From 684be25c52a1e43638ced160be0b0b46596e7f2b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 21 Apr 2010 20:45:59 -0700 Subject: ceph: fix seq counting for skipped messages Increment in_seq even when the message is skipped for some reason. Signed-off-by: Sage Weil --- fs/ceph/messenger.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ceph/messenger.c') diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index cdaaa131add3..e7b91e093f54 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -1379,6 +1379,7 @@ static int read_partial_message(struct ceph_connection *con) con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; + con->in_seq++; return 0; } if (IS_ERR(con->in_msg)) { @@ -2030,6 +2031,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) ceph_msg_put(con->in_msg); con->in_msg = NULL; con->in_tag = CEPH_MSGR_TAG_READY; + con->in_seq++; } else { dout("con_revoke_pages %p msg %p pages %p no-op\n", con, con->in_msg, msg); -- cgit v1.2.3 From ae18756b9fa7bb93132cff06cd8575e3d46633f9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 22 Apr 2010 07:47:01 -0700 Subject: ceph: discard incoming messages with bad seq # We can get old message seq #'s after a tcp reconnect for stateful sessions (i.e., the MDS). If we get a higher seq #, that is an error, and we shouldn't see any bad seq #'s for stateless (mon, osd) connections. Signed-off-by: Sage Weil --- fs/ceph/messenger.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'fs/ceph/messenger.c') diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index e7b91e093f54..509f57d9ccb3 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -1334,6 +1334,7 @@ static int read_partial_message(struct ceph_connection *con) unsigned front_len, middle_len, data_len, data_off; int datacrc = con->msgr->nocrc; int skip; + u64 seq; dout("read_partial_message con %p msg %p\n", con, m); @@ -1368,6 +1369,25 @@ static int read_partial_message(struct ceph_connection *con) return -EIO; data_off = le16_to_cpu(con->in_hdr.data_off); + /* verify seq# */ + seq = le64_to_cpu(con->in_hdr.seq); + if ((s64)seq - (s64)con->in_seq < 1) { + pr_info("skipping %s%lld %s seq %lld, expected %lld\n", + ENTITY_NAME(con->peer_name), + pr_addr(&con->peer_addr.in_addr), + seq, con->in_seq + 1); + con->in_base_pos = -front_len - middle_len - data_len - + sizeof(m->footer); + con->in_tag = CEPH_MSGR_TAG_READY; + con->in_seq++; + return 0; + } else if ((s64)seq - (s64)con->in_seq > 1) { + pr_err("read_partial_message bad seq %lld expected %lld\n", + seq, con->in_seq + 1); + con->error_msg = "bad message sequence # for incoming message"; + return -EBADMSG; + } + /* allocate message? */ if (!con->in_msg) { dout("got hdr type %d front %d data %d\n", con->in_hdr.type, -- cgit v1.2.3 From 45c6ceb547ad2d98215351974a4686bf8cb13e14 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 11 May 2010 15:01:51 -0700 Subject: ceph: zero unused message header, footer fields We shouldn't leak any prior memory contents to other parties. And random data, particularly in the 'version' field, can cause problems down the line. Signed-off-by: Sage Weil --- fs/ceph/messenger.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/ceph/messenger.c') diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 509f57d9ccb3..a3a8f368845b 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -2085,15 +2085,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, kref_init(&m->kref); INIT_LIST_HEAD(&m->list_head); + m->hdr.tid = 0; m->hdr.type = cpu_to_le16(type); + m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); + m->hdr.version = 0; m->hdr.front_len = cpu_to_le32(front_len); m->hdr.middle_len = 0; m->hdr.data_len = cpu_to_le32(page_len); m->hdr.data_off = cpu_to_le16(page_off); - m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); + m->hdr.reserved = 0; m->footer.front_crc = 0; m->footer.middle_crc = 0; m->footer.data_crc = 0; + m->footer.flags = 0; m->front_max = front_len; m->front_is_vmalloc = false; m->more_to_follow = false; -- cgit v1.2.3 From e84346b726ea90a8ed470bc81c4136a7b8710ea5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 11 May 2010 21:20:38 -0700 Subject: ceph: preserve seq # on requeued messages after transient transport errors If the tcp connection drops and we reconnect to reestablish a stateful session (with the mds), we need to resend previously sent (and possibly received) messages with the _same_ seq # so that they can be dropped on the other end if needed. Only assign a new seq once after the message is queued. Signed-off-by: Sage Weil --- fs/ceph/messenger.c | 11 ++++++++++- fs/ceph/messenger.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/ceph/messenger.c') diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index a3a8f368845b..cd4fadb6491a 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con) list_move_tail(&m->list_head, &con->out_sent); } - m->hdr.seq = cpu_to_le64(++con->out_seq); + /* + * only assign outgoing seq # if we haven't sent this message + * yet. if it is requeued, resend with it's original seq. + */ + if (m->needs_out_seq) { + m->hdr.seq = cpu_to_le64(++con->out_seq); + m->needs_out_seq = false; + } dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", m, con->out_seq, le16_to_cpu(m->hdr.type), @@ -1986,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); + msg->needs_out_seq = true; + /* queue */ mutex_lock(&con->mutex); BUG_ON(!list_empty(&msg->list_head)); diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a343dae73cdc..a5caf91cc971 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -86,6 +86,7 @@ struct ceph_msg { struct kref kref; bool front_is_vmalloc; bool more_to_follow; + bool needs_out_seq; int front_max; struct ceph_msgpool *pool; -- cgit v1.2.3