diff options
author | Steve Wise <swise@opengridcomputing.com> | 2014-04-09 16:38:25 +0200 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2014-04-11 20:36:07 +0200 |
commit | b33bd0cbfa102b8f87702338aa72742fe3c7f220 (patch) | |
tree | 0a57d3b48b92f125c028b26b1b922c96f2c30c05 /drivers/infiniband | |
parent | RDMA/cxgb4: Use the BAR2/WC path for kernel QPs and T5 devices (diff) | |
download | linux-b33bd0cbfa102b8f87702338aa72742fe3c7f220.tar.xz linux-b33bd0cbfa102b8f87702338aa72742fe3c7f220.zip |
RDMA/cxgb4: Endpoint timeout fixes
1) timedout endpoint processing can be starved. If there are continual
CPL messages flowing into the driver, the endpoint timeout
processing can be starved. This condition exposed the other bugs
below.
Solution: In process_work(), call process_timedout_eps() after each CPL
is processed.
2) Connection events can be processed even though the endpoint is on
the timeout list. If the endpoint is scheduled for timeout
processing, then we must ignore MPA Start Requests and Replies.
Solution: Change stop_ep_timer() to return 1 if the ep has already been
queued for timeout processing. All the callers of stop_ep_timer() need
to check this and act accordingly. There are just a few cases where
the caller needs to do something different if stop_ep_timer() returns 1:
1) in process_mpa_reply(), ignore the reply and process_timeout()
will abort the connection.
2) in process_mpa_request, ignore the request and process_timeout()
will abort the connection.
It is ok for callers of stop_ep_timer() to abort the connection since
that will leave the state in ABORTING or DEAD, and process_timeout()
now ignores timeouts when the ep is in these states.
3) Double insertion on the timeout list. Since the endpoint timers
are used for connection setup and teardown, we need to guard
against the possibility that an endpoint is already on the timeout
list. This is a rare condition and only seen under heavy load and
in the presense of the above 2 bugs.
Solution: In ep_timeout(), don't queue the endpoint if it is already on
the queue.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cm.c | 89 |
1 files changed, 56 insertions, 33 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 02436d5d0dab..185452abf32c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -173,12 +173,15 @@ static void start_ep_timer(struct c4iw_ep *ep) add_timer(&ep->timer); } -static void stop_ep_timer(struct c4iw_ep *ep) +static int stop_ep_timer(struct c4iw_ep *ep) { PDBG("%s ep %p stopping\n", __func__, ep); del_timer_sync(&ep->timer); - if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { c4iw_put_ep(&ep->com); + return 0; + } + return 1; } static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, @@ -1165,12 +1168,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. + * Stop mpa timer. If it expired, then + * we ignore the MPA reply. process_timeout() + * will abort the connection. */ - stop_ep_timer(ep); - if (ep->com.state != MPA_REQ_SENT) + if (stop_ep_timer(ep)) return; /* @@ -1375,15 +1377,12 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (ep->com.state != MPA_REQ_WAIT) - return; - /* * If we get more than the supported amount of private data * then we must fail this connection. */ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1413,13 +1412,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (mpa->revision > mpa_rev) { printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1430,7 +1429,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * Fail if there's too much private data. */ if (plen > MPA_MAX_PRIVATE_DATA) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1439,7 +1438,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1496,18 +1495,24 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type); - __state_set(&ep->com, MPA_REQ_RCVD); - stop_ep_timer(ep); - - /* drive upcall */ - mutex_lock(&ep->parent_ep->com.mutex); - if (ep->parent_ep->com.state != DEAD) { - if (connect_request_upcall(ep)) + /* + * If the endpoint timer already expired, then we ignore + * the start request. process_timeout() will abort + * the connection. + */ + if (!stop_ep_timer(ep)) { + __state_set(&ep->com, MPA_REQ_RCVD); + + /* drive upcall */ + mutex_lock(&ep->parent_ep->com.mutex); + if (ep->parent_ep->com.state != DEAD) { + if (connect_request_upcall(ep)) + abort_connection(ep, skb, GFP_KERNEL); + } else { abort_connection(ep, skb, GFP_KERNEL); - } else { - abort_connection(ep, skb, GFP_KERNEL); + } + mutex_unlock(&ep->parent_ep->com.mutex); } - mutex_unlock(&ep->parent_ep->com.mutex); return; } @@ -2265,7 +2270,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) disconnect = 0; break; case MORIBUND: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, @@ -2325,10 +2330,10 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) case CONNECTING: break; case MPA_REQ_WAIT: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); break; case MPA_REQ_SENT: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) connect_reply_upcall(ep, -ECONNRESET); else { @@ -2433,7 +2438,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) __state_set(&ep->com, MORIBUND); break; case MORIBUND: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); if ((ep->com.cm_id) && (ep->com.qp)) { attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.qp->rhp, @@ -3028,7 +3033,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { close = 1; if (abrupt) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); ep->com.state = ABORTING; } else ep->com.state = MORIBUND; @@ -3462,6 +3467,16 @@ static void process_timeout(struct c4iw_ep *ep) __state_set(&ep->com, ABORTING); close_complete_upcall(ep, -ETIMEDOUT); break; + case ABORTING: + case DEAD: + + /* + * These states are expected if the ep timed out at the same + * time as another thread was calling stop_ep_timer(). + * So we silently do nothing for these states. + */ + abort = 0; + break; default: WARN(1, "%s unexpected state ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); @@ -3483,6 +3498,8 @@ static void process_timedout_eps(void) tmp = timeout_list.next; list_del(tmp); + tmp->next = NULL; + tmp->prev = NULL; spin_unlock_irq(&timeout_lock); ep = list_entry(tmp, struct c4iw_ep, entry); process_timeout(ep); @@ -3499,6 +3516,7 @@ static void process_work(struct work_struct *work) unsigned int opcode; int ret; + process_timedout_eps(); while ((skb = skb_dequeue(&rxq))) { rpl = cplhdr(skb); dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); @@ -3508,8 +3526,8 @@ static void process_work(struct work_struct *work) ret = work_handlers[opcode](dev, skb); if (!ret) kfree_skb(skb); + process_timedout_eps(); } - process_timedout_eps(); } static DECLARE_WORK(skb_work, process_work); @@ -3521,8 +3539,13 @@ static void ep_timeout(unsigned long arg) spin_lock(&timeout_lock); if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { - list_add_tail(&ep->entry, &timeout_list); - kickit = 1; + /* + * Only insert if it is not already on the list. + */ + if (!ep->entry.next) { + list_add_tail(&ep->entry, &timeout_list); + kickit = 1; + } } spin_unlock(&timeout_lock); if (kickit) |