diff options
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r-- | fs/ceph/caps.c | 140 |
1 files changed, 80 insertions, 60 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7bf182b03973..98ab13e2b71d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1,4 +1,4 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> #include <linux/fs.h> #include <linux/kernel.h> @@ -9,8 +9,9 @@ #include <linux/writeback.h> #include "super.h" -#include "decode.h" -#include "messenger.h" +#include "mds_client.h" +#include <linux/ceph/decode.h> +#include <linux/ceph/messenger.h> /* * Capability management @@ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) spin_unlock(&mdsc->caps_list_lock); } -void ceph_reservation_status(struct ceph_client *client, +void ceph_reservation_status(struct ceph_fs_client *fsc, int *total, int *avail, int *used, int *reserved, int *min) { - struct ceph_mds_client *mdsc = &client->mdsc; + struct ceph_mds_client *mdsc = fsc->mdsc; if (total) *total = mdsc->caps_total_count; @@ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci, static void __cap_set_timeouts(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { - struct ceph_mount_args *ma = mdsc->client->mount_args; + struct ceph_mount_options *ma = mdsc->fsc->mount_options; ci->i_hold_caps_min = round_jiffies(jiffies + ma->caps_wanted_delay_min * HZ); @@ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode, unsigned seq, unsigned mseq, u64 realmino, int flags, struct ceph_cap_reservation *caps_reservation) { - struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *new_cap = NULL; struct ceph_cap *cap; @@ -814,7 +815,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) used |= CEPH_CAP_PIN; if (ci->i_rd_ref) used |= CEPH_CAP_FILE_RD; - if (ci->i_rdcache_ref || ci->i_rdcache_gen) + if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) used |= CEPH_CAP_FILE_CACHE; if (ci->i_wr_ref) used |= CEPH_CAP_FILE_WR; @@ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; struct ceph_mds_client *mdsc = - &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; + ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; int removed = 0; dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); @@ -1082,6 +1083,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, gid_t gid; struct ceph_mds_session *session; u64 xattr_version = 0; + struct ceph_buffer *xattr_blob = NULL; int delayed = 0; u64 flush_tid = 0; int i; @@ -1142,6 +1144,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, for (i = 0; i < CEPH_CAP_BITS; i++) if (flushing & (1 << i)) ci->i_cap_flush_tid[i] = flush_tid; + + follows = ci->i_head_snapc->seq; + } else { + follows = 0; } keep = cap->implemented; @@ -1155,14 +1161,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, mtime = inode->i_mtime; atime = inode->i_atime; time_warp_seq = ci->i_time_warp_seq; - follows = ci->i_snap_realm->cached_context->seq; uid = inode->i_uid; gid = inode->i_gid; mode = inode->i_mode; - if (dropping & CEPH_CAP_XATTR_EXCL) { + if (flushing & CEPH_CAP_XATTR_EXCL) { __ceph_build_xattrs_blob(ci); - xattr_version = ci->i_xattrs.version + 1; + xattr_blob = ci->i_xattrs.blob; + xattr_version = ci->i_xattrs.version; } spin_unlock(&inode->i_lock); @@ -1170,9 +1176,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, - uid, gid, mode, - xattr_version, - (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL, + uid, gid, mode, xattr_version, xattr_blob, follows); if (ret < 0) { dout("error sending cap msg, must requeue %p\n", inode); @@ -1192,10 +1196,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, * asynchronously back to the MDS once sync writes complete and dirty * data is written out. * + * Unless @again is true, skip cap_snaps that were already sent to + * the MDS (i.e., during this session). + * * Called under i_lock. Takes s_mutex as needed. */ void __ceph_flush_snaps(struct ceph_inode_info *ci, - struct ceph_mds_session **psession) + struct ceph_mds_session **psession, + int again) __releases(ci->vfs_inode->i_lock) __acquires(ci->vfs_inode->i_lock) { @@ -1203,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, int mds; struct ceph_cap_snap *capsnap; u32 mseq; - struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_session *session = NULL; /* if session != NULL, we hold session->s_mutex */ u64 next_follows = 0; /* keep track of how far we've gotten through the @@ -1224,7 +1232,7 @@ retry: * pages to be written out. */ if (capsnap->dirty_pages || capsnap->writing) - continue; + break; /* * if cap writeback already occurred, we should have dropped @@ -1237,6 +1245,13 @@ retry: dout("no auth cap (migrating?), doing nothing\n"); goto out; } + + /* only flush each capsnap once */ + if (!again && !list_empty(&capsnap->flushing_item)) { + dout("already flushed %p, skipping\n", capsnap); + continue; + } + mds = ci->i_auth_cap->session->s_mds; mseq = ci->i_auth_cap->mseq; @@ -1273,8 +1288,8 @@ retry: &session->s_cap_snaps_flushing); spin_unlock(&inode->i_lock); - dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", - inode, capsnap, next_follows, capsnap->size); + dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", + inode, capsnap, capsnap->follows, capsnap->flush_tid); send_cap_msg(session, ceph_vino(inode).ino, 0, CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, @@ -1282,7 +1297,7 @@ retry: &capsnap->mtime, &capsnap->atime, capsnap->time_warp_seq, capsnap->uid, capsnap->gid, capsnap->mode, - 0, NULL, + capsnap->xattr_version, capsnap->xattr_blob, capsnap->follows); next_follows = capsnap->follows + 1; @@ -1311,7 +1326,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) struct inode *inode = &ci->vfs_inode; spin_lock(&inode->i_lock); - __ceph_flush_snaps(ci, NULL); + __ceph_flush_snaps(ci, NULL, 0); spin_unlock(&inode->i_lock); } @@ -1322,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) { struct ceph_mds_client *mdsc = - &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; + ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; struct inode *inode = &ci->vfs_inode; int was = ci->i_dirty_caps; int dirty = 0; @@ -1332,7 +1347,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) ceph_cap_string(was | mask)); ci->i_dirty_caps |= mask; if (was == 0) { - dout(" inode %p now dirty\n", &ci->vfs_inode); + if (!ci->i_head_snapc) + ci->i_head_snapc = ceph_get_snap_context( + ci->i_snap_realm->cached_context); + dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode, + ci->i_head_snapc); BUG_ON(!list_empty(&ci->i_dirty_item)); spin_lock(&mdsc->cap_dirty_lock); list_add(&ci->i_dirty_item, &mdsc->cap_dirty); @@ -1360,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) static int __mark_caps_flushing(struct inode *inode, struct ceph_mds_session *session) { - struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int flushing; @@ -1398,17 +1417,6 @@ static int __mark_caps_flushing(struct inode *inode, /* * try to invalidate mapping pages without blocking. */ -static int mapping_is_empty(struct address_space *mapping) -{ - struct page *page = find_get_page(mapping, 0); - - if (!page) - return 1; - - put_page(page); - return 0; -} - static int try_nonblocking_invalidate(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); @@ -1418,7 +1426,7 @@ static int try_nonblocking_invalidate(struct inode *inode) invalidate_mapping_pages(&inode->i_data, 0, -1); spin_lock(&inode->i_lock); - if (mapping_is_empty(&inode->i_data) && + if (inode->i_data.nrpages == 0 && invalidating_gen == ci->i_rdcache_gen) { /* success. */ dout("try_nonblocking_invalidate %p success\n", inode); @@ -1444,8 +1452,8 @@ static int try_nonblocking_invalidate(struct inode *inode) void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session) { - struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); - struct ceph_mds_client *mdsc = &client->mdsc; + struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); + struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; int file_wanted, used; @@ -1470,7 +1478,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, /* flush snaps first time around only */ if (!list_empty(&ci->i_cap_snaps)) - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 0); goto retry_locked; retry: spin_lock(&inode->i_lock); @@ -1515,7 +1523,7 @@ retry_locked: */ if ((!is_delayed || mdsc->stopping) && ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ - ci->i_rdcache_gen && /* may have cached pages */ + inode->i_data.nrpages && /* have cached pages */ (file_wanted == 0 || /* no open files */ (revoking & (CEPH_CAP_FILE_CACHE| CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ @@ -1688,7 +1696,7 @@ ack: static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, unsigned *flush_tid) { - struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int unlock_session = session ? 0 : 1; int flushing = 0; @@ -1854,7 +1862,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) caps_are_flushed(inode, flush_tid)); } else { struct ceph_mds_client *mdsc = - &ceph_sb_to_client(inode->i_sb)->mdsc; + ceph_sb_to_client(inode->i_sb)->mdsc; spin_lock(&inode->i_lock); if (__ceph_caps_dirty(ci)) @@ -1887,7 +1895,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, if (cap && cap->session == session) { dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, cap, capsnap); - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 1); } else { pr_err("%p auth cap %p not mds%d ???\n", inode, cap, session->s_mds); @@ -2190,7 +2198,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, if (ci->i_head_snapc == snapc) { ci->i_wrbuffer_ref_head -= nr; - if (!ci->i_wrbuffer_ref_head) { + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { + BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; } @@ -2263,7 +2273,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; - int seq = le32_to_cpu(grant->seq); + unsigned seq = le32_to_cpu(grant->seq); + unsigned issue_seq = le32_to_cpu(grant->issue_seq); int newcaps = le32_to_cpu(grant->caps); int issued, implemented, used, wanted, dirty; u64 size = le64_to_cpu(grant->size); @@ -2275,8 +2286,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, int revoked_rdcache = 0; int queue_invalidate = 0; - dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", - inode, cap, mds, seq, ceph_cap_string(newcaps)); + dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", + inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, inode->i_size); @@ -2372,6 +2383,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, } cap->seq = seq; + cap->issue_seq = issue_seq; /* file layout may have changed */ ci->i_layout = grant->layout; @@ -2443,7 +2455,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, __releases(inode->i_lock) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; unsigned seq = le32_to_cpu(m->seq); int dirty = le32_to_cpu(m->dirty); int cleaned = 0; @@ -2483,6 +2495,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, dout(" inode %p now clean\n", inode); BUG_ON(!list_empty(&ci->i_dirty_item)); drop = 1; + if (ci->i_wrbuffer_ref_head == 0) { + BUG_ON(!ci->i_head_snapc); + ceph_put_snap_context(ci->i_head_snapc); + ci->i_head_snapc = NULL; + } } else { BUG_ON(list_empty(&ci->i_dirty_item)); } @@ -2686,7 +2703,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_mds_client *mdsc = session->s_mdsc; - struct super_block *sb = mdsc->client->sb; + struct super_block *sb = mdsc->fsc->sb; struct inode *inode; struct ceph_cap *cap; struct ceph_mds_caps *h; @@ -2749,15 +2766,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, if (op == CEPH_CAP_OP_IMPORT) __queue_cap_release(session, vino.ino, cap_id, mseq, seq); - - /* - * send any full release message to try to move things - * along for the mds (who clearly thinks we still have this - * cap). - */ - ceph_add_cap_releases(mdsc, session); - ceph_send_cap_releases(mdsc, session); - goto done; + goto flush_cap_releases; } /* these will work even if we don't have a cap yet */ @@ -2785,7 +2794,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, dout(" no cap on %p ino %llx.%llx from mds%d\n", inode, ceph_ino(inode), ceph_snap(inode), mds); spin_unlock(&inode->i_lock); - goto done; + goto flush_cap_releases; } /* note that each of these drops i_lock for us */ @@ -2809,6 +2818,17 @@ void ceph_handle_caps(struct ceph_mds_session *session, ceph_cap_op_name(op)); } + goto done; + +flush_cap_releases: + /* + * send any full release message to try to move things + * along for the mds (who clearly thinks we still have this + * cap). + */ + ceph_add_cap_releases(mdsc, session); + ceph_send_cap_releases(mdsc, session); + done: mutex_unlock(&session->s_mutex); done_unlocked: |