diff options
Diffstat (limited to 'net/ceph/messenger.c')
-rw-r--r-- | net/ceph/messenger.c | 130 |
1 files changed, 64 insertions, 66 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 3ef1759403b4..5ccf87ed8d68 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -506,6 +506,7 @@ static void reset_connection(struct ceph_connection *con) { /* reset connection, out_queue, msg_ and connect_seq */ /* discard existing out_queue and msg_seq */ + dout("reset_connection %p\n", con); ceph_msg_remove_list(&con->out_queue); ceph_msg_remove_list(&con->out_sent); @@ -561,7 +562,7 @@ void ceph_con_open(struct ceph_connection *con, mutex_lock(&con->mutex); dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); - BUG_ON(con->state != CON_STATE_CLOSED); + WARN_ON(con->state != CON_STATE_CLOSED); con->state = CON_STATE_PREOPEN; con->peer_name.type = (__u8) entity_type; @@ -1506,13 +1507,6 @@ static int process_banner(struct ceph_connection *con) return 0; } -static void fail_protocol(struct ceph_connection *con) -{ - reset_connection(con); - BUG_ON(con->state != CON_STATE_NEGOTIATING); - con->state = CON_STATE_CLOSED; -} - static int process_connect(struct ceph_connection *con) { u64 sup_feat = con->msgr->supported_features; @@ -1530,7 +1524,7 @@ static int process_connect(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr), sup_feat, server_feat, server_feat & ~sup_feat); con->error_msg = "missing required protocol features"; - fail_protocol(con); + reset_connection(con); return -1; case CEPH_MSGR_TAG_BADPROTOVER: @@ -1541,7 +1535,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->out_connect.protocol_version), le32_to_cpu(con->in_reply.protocol_version)); con->error_msg = "protocol version mismatch"; - fail_protocol(con); + reset_connection(con); return -1; case CEPH_MSGR_TAG_BADAUTHORIZER: @@ -1631,11 +1625,11 @@ static int process_connect(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr), req_feat, server_feat, req_feat & ~server_feat); con->error_msg = "missing required protocol features"; - fail_protocol(con); + reset_connection(con); return -1; } - BUG_ON(con->state != CON_STATE_NEGOTIATING); + WARN_ON(con->state != CON_STATE_NEGOTIATING); con->state = CON_STATE_OPEN; con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); @@ -2132,7 +2126,6 @@ more: if (ret < 0) goto out; - BUG_ON(con->state != CON_STATE_CONNECTING); con->state = CON_STATE_NEGOTIATING; /* @@ -2160,7 +2153,7 @@ more: goto more; } - BUG_ON(con->state != CON_STATE_OPEN); + WARN_ON(con->state != CON_STATE_OPEN); if (con->in_base_pos < 0) { /* @@ -2244,22 +2237,62 @@ bad_tag: /* - * Atomically queue work on a connection. Bump @con reference to - * avoid races with connection teardown. + * Atomically queue work on a connection after the specified delay. + * Bump @con reference to avoid races with connection teardown. + * Returns 0 if work was queued, or an error code otherwise. */ -static void queue_con(struct ceph_connection *con) +static int queue_con_delay(struct ceph_connection *con, unsigned long delay) { if (!con->ops->get(con)) { - dout("queue_con %p ref count 0\n", con); - return; + dout("%s %p ref count 0\n", __func__, con); + + return -ENOENT; } - if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { - dout("queue_con %p - already queued\n", con); + if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { + dout("%s %p - already queued\n", __func__, con); con->ops->put(con); - } else { - dout("queue_con %p\n", con); + + return -EBUSY; + } + + dout("%s %p %lu\n", __func__, con, delay); + + return 0; +} + +static void queue_con(struct ceph_connection *con) +{ + (void) queue_con_delay(con, 0); +} + +static bool con_sock_closed(struct ceph_connection *con) +{ + if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) + return false; + +#define CASE(x) \ + case CON_STATE_ ## x: \ + con->error_msg = "socket closed (con state " #x ")"; \ + break; + + switch (con->state) { + CASE(CLOSED); + CASE(PREOPEN); + CASE(CONNECTING); + CASE(NEGOTIATING); + CASE(OPEN); + CASE(STANDBY); + default: + pr_warning("%s con %p unrecognized state %lu\n", + __func__, con, con->state); + con->error_msg = "unrecognized con state"; + BUG(); + break; } +#undef CASE + + return true; } /* @@ -2273,35 +2306,16 @@ static void con_work(struct work_struct *work) mutex_lock(&con->mutex); restart: - if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) { - switch (con->state) { - case CON_STATE_CONNECTING: - con->error_msg = "connection failed"; - break; - case CON_STATE_NEGOTIATING: - con->error_msg = "negotiation failed"; - break; - case CON_STATE_OPEN: - con->error_msg = "socket closed"; - break; - default: - dout("unrecognized con state %d\n", (int)con->state); - con->error_msg = "unrecognized con state"; - BUG(); - } + if (con_sock_closed(con)) goto fault; - } if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { dout("con_work %p backing off\n", con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay))) { - dout("con_work %p backoff %lu\n", con, con->delay); - mutex_unlock(&con->mutex); - return; - } else { + ret = queue_con_delay(con, round_jiffies_relative(con->delay)); + if (ret) { dout("con_work %p FAILED to back off %lu\n", con, con->delay); + BUG_ON(ret == -ENOENT); set_bit(CON_FLAG_BACKOFF, &con->flags); } goto done; @@ -2356,12 +2370,12 @@ fault: static void ceph_fault(struct ceph_connection *con) __releases(con->mutex) { - pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), + pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); dout("fault %p state %lu to peer %s\n", con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); - BUG_ON(con->state != CON_STATE_CONNECTING && + WARN_ON(con->state != CON_STATE_CONNECTING && con->state != CON_STATE_NEGOTIATING && con->state != CON_STATE_OPEN); @@ -2398,24 +2412,8 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - con->ops->get(con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay))) { - dout("fault queued %p delay %lu\n", con, con->delay); - } else { - con->ops->put(con); - dout("fault failed to queue %p delay %lu, backoff\n", - con, con->delay); - /* - * In many cases we see a socket state change - * while con_work is running and end up - * queuing (non-delayed) work, such that we - * can't backoff with a delay. Set a flag so - * that when con_work restarts we schedule the - * delay then. - */ - set_bit(CON_FLAG_BACKOFF, &con->flags); - } + set_bit(CON_FLAG_BACKOFF, &con->flags); + queue_con(con); } out_unlock: |