summaryrefslogtreecommitdiffstats
path: root/fs/ceph/mds_client.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r--fs/ceph/mds_client.c136
1 files changed, 107 insertions, 29 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index bad07c09f91e..a92d3f5c6c12 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -7,6 +7,7 @@
#include <linux/sched.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
+#include <linux/utsname.h>
#include "super.h"
#include "mds_client.h"
@@ -334,7 +335,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
/*
* sessions
*/
-static const char *session_state_name(int s)
+const char *ceph_session_state_name(int s)
{
switch (s) {
case CEPH_MDS_SESSION_NEW: return "new";
@@ -542,6 +543,8 @@ void ceph_mdsc_release_request(struct kref *kref)
}
kfree(req->r_path1);
kfree(req->r_path2);
+ if (req->r_pagelist)
+ ceph_pagelist_release(req->r_pagelist);
put_request_session(req);
ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
kfree(req);
@@ -812,6 +815,74 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq)
h = msg->front.iov_base;
h->op = cpu_to_le32(op);
h->seq = cpu_to_le64(seq);
+
+ return msg;
+}
+
+/*
+ * session message, specialization for CEPH_SESSION_REQUEST_OPEN
+ * to include additional client metadata fields.
+ */
+static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_session_head *h;
+ int i = -1;
+ int metadata_bytes = 0;
+ int metadata_key_count = 0;
+ struct ceph_options *opt = mdsc->fsc->client->options;
+ void *p;
+
+ const char* metadata[3][2] = {
+ {"hostname", utsname()->nodename},
+ {"entity_id", opt->name ? opt->name : ""},
+ {NULL, NULL}
+ };
+
+ /* Calculate serialized length of metadata */
+ metadata_bytes = 4; /* map length */
+ for (i = 0; metadata[i][0] != NULL; ++i) {
+ metadata_bytes += 8 + strlen(metadata[i][0]) +
+ strlen(metadata[i][1]);
+ metadata_key_count++;
+ }
+
+ /* Allocate the message */
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + metadata_bytes,
+ GFP_NOFS, false);
+ if (!msg) {
+ pr_err("create_session_msg ENOMEM creating msg\n");
+ return NULL;
+ }
+ h = msg->front.iov_base;
+ h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
+ h->seq = cpu_to_le64(seq);
+
+ /*
+ * Serialize client metadata into waiting buffer space, using
+ * the format that userspace expects for map<string, string>
+ */
+ msg->hdr.version = 2; /* ClientSession messages with metadata are v2 */
+
+ /* The write pointer, following the session_head structure */
+ p = msg->front.iov_base + sizeof(*h);
+
+ /* Number of entries in the map */
+ ceph_encode_32(&p, metadata_key_count);
+
+ /* Two length-prefixed strings for each entry in the map */
+ for (i = 0; metadata[i][0] != NULL; ++i) {
+ size_t const key_len = strlen(metadata[i][0]);
+ size_t const val_len = strlen(metadata[i][1]);
+
+ ceph_encode_32(&p, key_len);
+ memcpy(p, metadata[i][0], key_len);
+ p += key_len;
+ ceph_encode_32(&p, val_len);
+ memcpy(p, metadata[i][1], val_len);
+ p += val_len;
+ }
+
return msg;
}
@@ -835,7 +906,7 @@ static int __open_session(struct ceph_mds_client *mdsc,
session->s_renew_requested = jiffies;
/* send connect message */
- msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq);
+ msg = create_session_open_msg(mdsc, session->s_seq);
if (!msg)
return -ENOMEM;
ceph_con_send(&session->s_con, msg);
@@ -1164,7 +1235,7 @@ static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
struct ceph_msg *msg;
dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n",
- session->s_mds, session_state_name(session->s_state), seq);
+ session->s_mds, ceph_session_state_name(session->s_state), seq);
msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
if (!msg)
return -ENOMEM;
@@ -1216,7 +1287,7 @@ static int request_close_session(struct ceph_mds_client *mdsc,
struct ceph_msg *msg;
dout("request_close_session mds%d state %s seq %lld\n",
- session->s_mds, session_state_name(session->s_state),
+ session->s_mds, ceph_session_state_name(session->s_state),
session->s_seq);
msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq);
if (!msg)
@@ -1847,13 +1918,15 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
- if (req->r_data_len) {
- /* outbound data set only by ceph_sync_setxattr() */
- BUG_ON(!req->r_pages);
- ceph_msg_data_add_pages(msg, req->r_pages, req->r_data_len, 0);
+ if (req->r_pagelist) {
+ struct ceph_pagelist *pagelist = req->r_pagelist;
+ atomic_inc(&pagelist->refcnt);
+ ceph_msg_data_add_pagelist(msg, pagelist);
+ msg->hdr.data_len = cpu_to_le32(pagelist->length);
+ } else {
+ msg->hdr.data_len = 0;
}
- msg->hdr.data_len = cpu_to_le32(req->r_data_len);
msg->hdr.data_off = cpu_to_le16(0);
out_free2:
@@ -2007,7 +2080,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
req->r_session = get_session(session);
dout("do_request mds%d session %p state %s\n", mds, session,
- session_state_name(session->s_state));
+ ceph_session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
session->s_state != CEPH_MDS_SESSION_HUNG) {
if (session->s_state == CEPH_MDS_SESSION_NEW ||
@@ -2078,6 +2151,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
if (req->r_session &&
req->r_session->s_mds == mds) {
dout(" kicking tid %llu\n", req->r_tid);
+ list_del_init(&req->r_wait);
__do_request(mdsc, req);
}
}
@@ -2444,7 +2518,7 @@ static void handle_session(struct ceph_mds_session *session,
dout("handle_session mds%d %s %p state %s seq %llu\n",
mds, ceph_session_op_name(op), session,
- session_state_name(session->s_state), seq);
+ ceph_session_state_name(session->s_state), seq);
if (session->s_state == CEPH_MDS_SESSION_HUNG) {
session->s_state = CEPH_MDS_SESSION_OPEN;
@@ -2471,9 +2545,8 @@ static void handle_session(struct ceph_mds_session *session,
if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
pr_info("mds%d reconnect denied\n", session->s_mds);
remove_session_caps(session);
- wake = 1; /* for good measure */
+ wake = 2; /* for good measure */
wake_up_all(&mdsc->session_close_wq);
- kick_requests(mdsc, mds);
break;
case CEPH_SESSION_STALE:
@@ -2503,6 +2576,8 @@ static void handle_session(struct ceph_mds_session *session,
if (wake) {
mutex_lock(&mdsc->mutex);
__wake_requests(mdsc, &session->s_waiting);
+ if (wake == 2)
+ kick_requests(mdsc, mds);
mutex_unlock(&mdsc->mutex);
}
return;
@@ -2695,18 +2770,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
session->s_state = CEPH_MDS_SESSION_RECONNECTING;
session->s_seq = 0;
- ceph_con_close(&session->s_con);
- ceph_con_open(&session->s_con,
- CEPH_ENTITY_TYPE_MDS, mds,
- ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
-
- /* replay unsafe requests */
- replay_unsafe_requests(mdsc, session);
-
- down_read(&mdsc->snap_rwsem);
-
dout("session %p state %s\n", session,
- session_state_name(session->s_state));
+ ceph_session_state_name(session->s_state));
spin_lock(&session->s_gen_ttl_lock);
session->s_cap_gen++;
@@ -2723,6 +2788,19 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
discard_cap_releases(mdsc, session);
spin_unlock(&session->s_cap_lock);
+ /* trim unused caps to reduce MDS's cache rejoin time */
+ shrink_dcache_parent(mdsc->fsc->sb->s_root);
+
+ ceph_con_close(&session->s_con);
+ ceph_con_open(&session->s_con,
+ CEPH_ENTITY_TYPE_MDS, mds,
+ ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
+
+ /* replay unsafe requests */
+ replay_unsafe_requests(mdsc, session);
+
+ down_read(&mdsc->snap_rwsem);
+
/* traverse this session's caps */
s_nr_caps = session->s_nr_caps;
err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
@@ -2791,7 +2869,6 @@ fail:
mutex_unlock(&session->s_mutex);
fail_nomsg:
ceph_pagelist_release(pagelist);
- kfree(pagelist);
fail_nopagelist:
pr_err("error %d preparing reconnect for mds%d\n", err, mds);
return;
@@ -2827,7 +2904,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "",
ceph_mds_state_name(newstate),
ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
- session_state_name(s->s_state));
+ ceph_session_state_name(s->s_state));
if (i >= newmap->m_max_mds ||
memcmp(ceph_mdsmap_get_addr(oldmap, i),
@@ -2939,14 +3016,15 @@ static void handle_lease(struct ceph_mds_client *mdsc,
if (dname.len != get_unaligned_le32(h+1))
goto bad;
- mutex_lock(&session->s_mutex);
- session->s_seq++;
-
/* lookup inode */
inode = ceph_find_inode(sb, vino);
dout("handle_lease %s, ino %llx %p %.*s\n",
ceph_lease_op_name(h->action), vino.ino, inode,
dname.len, dname.name);
+
+ mutex_lock(&session->s_mutex);
+ session->s_seq++;
+
if (inode == NULL) {
dout("handle_lease no inode %llx\n", vino.ino);
goto release;