summaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2015-06-10 09:17:56 +0200
committerIlya Dryomov <idryomov@gmail.com>2015-06-25 10:49:31 +0200
commite548e9b93d3e565e42b938a99804114565be1f81 (patch)
tree13de716de6a024a26ad3930312e67685299e44c6 /fs/ceph
parentceph: send TID of the oldest pending caps flush to MDS (diff)
downloadlinux-e548e9b93d3e565e42b938a99804114565be1f81.tar.xz
linux-e548e9b93d3e565e42b938a99804114565be1f81.zip
ceph: re-send flushing caps (which are revoked) in reconnect stage
if flushing caps were revoked, we should re-send the cap flush in client reconnect stage. This guarantees that MDS processes the cap flush message before issuing the flushing caps to other client. Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/caps.c57
-rw-r--r--fs/ceph/mds_client.c3
-rw-r--r--fs/ceph/super.h7
3 files changed, 61 insertions, 6 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 420272788e01..69a16044ec41 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1486,6 +1486,7 @@ static int __mark_caps_flushing(struct inode *inode,
cf = kmalloc(sizeof(*cf), GFP_ATOMIC);
cf->caps = flushing;
+ cf->kick = false;
spin_lock(&mdsc->cap_dirty_lock);
list_del_init(&ci->i_dirty_item);
@@ -2101,7 +2102,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
- struct ceph_inode_info *ci)
+ struct ceph_inode_info *ci,
+ bool kick_all)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_cap *cap;
@@ -2127,7 +2129,9 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) {
cf = rb_entry(n, struct ceph_cap_flush, i_node);
- if (cf->tid >= first_tid)
+ if (cf->tid < first_tid)
+ continue;
+ if (kick_all || cf->kick)
break;
}
if (!n) {
@@ -2136,6 +2140,8 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
}
cf = rb_entry(n, struct ceph_cap_flush, i_node);
+ cf->kick = false;
+
first_tid = cf->tid + 1;
dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode,
@@ -2149,6 +2155,49 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
return delayed;
}
+void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_inode_info *ci;
+ struct ceph_cap *cap;
+ struct ceph_cap_flush *cf;
+ struct rb_node *n;
+
+ dout("early_kick_flushing_caps mds%d\n", session->s_mds);
+ list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
+ spin_lock(&ci->i_ceph_lock);
+ cap = ci->i_auth_cap;
+ if (!(cap && cap->session == session)) {
+ pr_err("%p auth cap %p not mds%d ???\n",
+ &ci->vfs_inode, cap, session->s_mds);
+ spin_unlock(&ci->i_ceph_lock);
+ continue;
+ }
+
+
+ /*
+ * if flushing caps were revoked, we re-send the cap flush
+ * in client reconnect stage. This guarantees MDS * processes
+ * the cap flush message before issuing the flushing caps to
+ * other client.
+ */
+ if ((cap->issued & ci->i_flushing_caps) !=
+ ci->i_flushing_caps) {
+ spin_unlock(&ci->i_ceph_lock);
+ if (!__kick_flushing_caps(mdsc, session, ci, true))
+ continue;
+ spin_lock(&ci->i_ceph_lock);
+ }
+
+ for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) {
+ cf = rb_entry(n, struct ceph_cap_flush, i_node);
+ cf->kick = true;
+ }
+
+ spin_unlock(&ci->i_ceph_lock);
+ }
+}
+
void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session)
{
@@ -2158,7 +2207,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
dout("kick_flushing_caps mds%d\n", session->s_mds);
list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
- int delayed = __kick_flushing_caps(mdsc, session, ci);
+ int delayed = __kick_flushing_caps(mdsc, session, ci, false);
if (delayed) {
spin_lock(&ci->i_ceph_lock);
__cap_delay_requeue(mdsc, ci);
@@ -2191,7 +2240,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
spin_unlock(&ci->i_ceph_lock);
- delayed = __kick_flushing_caps(mdsc, session, ci);
+ delayed = __kick_flushing_caps(mdsc, session, ci, true);
if (delayed) {
spin_lock(&ci->i_ceph_lock);
__cap_delay_requeue(mdsc, ci);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 31f6a78caa0a..89e4305a94d4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2982,6 +2982,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
reply->hdr.data_len = cpu_to_le32(pagelist->length);
ceph_msg_data_add_pagelist(reply, pagelist);
+
+ ceph_early_kick_flushing_caps(mdsc, session);
+
ceph_con_send(&session->s_con, reply);
mutex_unlock(&session->s_mutex);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 94d91471165f..e7f13f742357 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -189,9 +189,10 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
struct ceph_cap_flush {
u64 tid;
int caps;
- struct rb_node g_node;
+ bool kick;
+ struct rb_node g_node; // global
union {
- struct rb_node i_node;
+ struct rb_node i_node; // inode
struct list_head list;
};
};
@@ -868,6 +869,8 @@ extern void ceph_queue_caps_release(struct inode *inode);
extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
+extern void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,