diff options
Diffstat (limited to 'drivers/infiniband/hw/cxgb4')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/Kconfig | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cm.c | 242 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cq.c | 55 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/device.c | 220 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 14 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/mem.c | 61 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/provider.c | 47 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 217 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/resource.c | 10 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 78 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 14 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/user.h | 5 |
12 files changed, 638 insertions, 331 deletions
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index d4e8983fba53..23f38cf2c5cd 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,10 +1,10 @@ config INFINIBAND_CXGB4 - tristate "Chelsio T4 RDMA Driver" + tristate "Chelsio T4/T5 RDMA Driver" depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) select GENERIC_ALLOCATOR ---help--- - This is an iWARP/RDMA driver for the Chelsio T4 1GbE and - 10GbE adapters. + This is an iWARP/RDMA driver for the Chelsio T4 and T5 + 1GbE, 10GbE adapters and T5 40GbE adapter. For general information about Chelsio and our products, visit our website at <http://www.chelsio.com>. diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d286bdebe2ab..1f863a96a480 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -98,9 +98,9 @@ int c4iw_debug; module_param(c4iw_debug, int, 0644); MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)"); -static int peer2peer; +static int peer2peer = 1; module_param(peer2peer, int, 0644); -MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)"); static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; module_param(p2p_type, int, 0644); @@ -173,12 +173,15 @@ static void start_ep_timer(struct c4iw_ep *ep) add_timer(&ep->timer); } -static void stop_ep_timer(struct c4iw_ep *ep) +static int stop_ep_timer(struct c4iw_ep *ep) { PDBG("%s ep %p stopping\n", __func__, ep); del_timer_sync(&ep->timer); - if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { c4iw_put_ep(&ep->com); + return 0; + } + return 1; } static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, @@ -400,7 +403,8 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, n = dst_neigh_lookup(&rt->dst, &peer_ip); if (!n) return NULL; - if (!our_interface(dev, n->dev)) { + if (!our_interface(dev, n->dev) && + !(n->dev->flags & IFF_LOOPBACK)) { dst_release(&rt->dst); return NULL; } @@ -583,6 +587,10 @@ static int send_connect(struct c4iw_ep *ep) opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN(1); + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + opt2 |= T5_OPT_2_VALID; + opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + } t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure); if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { @@ -759,8 +767,9 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, ep->mpa_skb = skb; c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); start_ep_timer(ep); - state_set(&ep->com, MPA_REQ_SENT); + __state_set(&ep->com, MPA_REQ_SENT); ep->mpa_attr.initiator = 1; + ep->snd_seq += mpalen; return; } @@ -840,6 +849,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) t4_set_arp_err_handler(skb, NULL, arp_failure_discard); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; + ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -923,7 +933,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) skb_get(skb); t4_set_arp_err_handler(skb, NULL, arp_failure_discard); ep->mpa_skb = skb; - state_set(&ep->com, MPA_REP_SENT); + __state_set(&ep->com, MPA_REP_SENT); + ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -940,6 +951,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid, be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); + mutex_lock(&ep->com.mutex); dst_confirm(ep->dst); /* setup the hwtid for this connection */ @@ -963,17 +975,18 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) send_mpa_req(ep, skb, 1); else send_mpa_req(ep, skb, mpa_rev); - + mutex_unlock(&ep->com.mutex); return 0; } -static void close_complete_upcall(struct c4iw_ep *ep) +static void close_complete_upcall(struct c4iw_ep *ep, int status) { struct iw_cm_event event; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; + event.status = status; if (ep->com.cm_id) { PDBG("close complete delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); @@ -987,8 +1000,7 @@ static void close_complete_upcall(struct c4iw_ep *ep) static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - close_complete_upcall(ep); - state_set(&ep->com, ABORTING); + __state_set(&ep->com, ABORTING); set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); } @@ -1066,9 +1078,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) } } -static void connect_request_upcall(struct c4iw_ep *ep) +static int connect_request_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; + int ret; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); @@ -1093,15 +1106,14 @@ static void connect_request_upcall(struct c4iw_ep *ep) event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } - if (state_read(&ep->parent_ep->com) != DEAD) { - c4iw_get_ep(&ep->com); - ep->parent_ep->com.cm_id->event_handler( - ep->parent_ep->com.cm_id, - &event); - } + c4iw_get_ep(&ep->com); + ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, + &event); + if (ret) + c4iw_put_ep(&ep->com); set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); - ep->parent_ep = NULL; + return ret; } static void established_upcall(struct c4iw_ep *ep) @@ -1146,7 +1158,7 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) return credits; } -static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) +static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) { struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params; @@ -1156,17 +1168,17 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) struct c4iw_qp_attributes attrs; enum c4iw_qp_attr_mask mask; int err; + int disconnect = 0; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. + * Stop mpa timer. If it expired, then + * we ignore the MPA reply. process_timeout() + * will abort the connection. */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) - return; + if (stop_ep_timer(ep)) + return 0; /* * If we get more than the supported amount of private data @@ -1188,7 +1200,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * if we don't even have the mpa message, then bail. */ if (ep->mpa_pkt_len < sizeof(*mpa)) - return; + return 0; mpa = (struct mpa_message *) ep->mpa_pkt; /* Validate MPA header. */ @@ -1228,7 +1240,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * We'll continue process when more data arrives. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; + return 0; if (mpa->flags & MPA_REJECT) { err = -ECONNREFUSED; @@ -1240,7 +1252,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * start reply message including private data. And * the MPA header is valid. */ - state_set(&ep->com, FPDU_MODE); + __state_set(&ep->com, FPDU_MODE); ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; @@ -1330,9 +1342,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_NOMATCH_RTR; attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; + disconnect = 1; goto out; } @@ -1348,18 +1362,20 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_INSUFF_IRD; attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; + disconnect = 1; goto out; } goto out; err: - state_set(&ep->com, ABORTING); + __state_set(&ep->com, ABORTING); send_abort(ep, skb, GFP_KERNEL); out: connect_reply_upcall(ep, err); - return; + return disconnect; } static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) @@ -1370,15 +1386,12 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) != MPA_REQ_WAIT) - return; - /* * If we get more than the supported amount of private data * then we must fail this connection. */ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1400,7 +1413,6 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) return; PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - stop_ep_timer(ep); mpa = (struct mpa_message *) ep->mpa_pkt; /* @@ -1409,13 +1421,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (mpa->revision > mpa_rev) { printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1426,7 +1438,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * Fail if there's too much private data. */ if (plen > MPA_MAX_PRIVATE_DATA) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1435,7 +1447,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1492,10 +1504,24 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type); - state_set(&ep->com, MPA_REQ_RCVD); - - /* drive upcall */ - connect_request_upcall(ep); + /* + * If the endpoint timer already expired, then we ignore + * the start request. process_timeout() will abort + * the connection. + */ + if (!stop_ep_timer(ep)) { + __state_set(&ep->com, MPA_REQ_RCVD); + + /* drive upcall */ + mutex_lock(&ep->parent_ep->com.mutex); + if (ep->parent_ep->com.state != DEAD) { + if (connect_request_upcall(ep)) + abort_connection(ep, skb, GFP_KERNEL); + } else { + abort_connection(ep, skb, GFP_KERNEL); + } + mutex_unlock(&ep->parent_ep->com.mutex); + } return; } @@ -1507,19 +1533,23 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(hdr); struct tid_info *t = dev->rdev.lldi.tids; __u8 status = hdr->status; + int disconnect = 0; ep = lookup_tid(t, tid); + if (!ep) + return 0; PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); + mutex_lock(&ep->com.mutex); /* update RX credits */ update_rx_credits(ep, dlen); - switch (state_read(&ep->com)) { + switch (ep->com.state) { case MPA_REQ_SENT: ep->rcv_seq += dlen; - process_mpa_reply(ep, skb); + disconnect = process_mpa_reply(ep, skb); break; case MPA_REQ_WAIT: ep->rcv_seq += dlen; @@ -1532,15 +1562,19 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) pr_err("%s Unexpected streaming data." \ " qpid %u ep %p state %d tid %u status %d\n", __func__, ep->com.qp->wq.sq.qid, ep, - state_read(&ep->com), ep->hwtid, status); + ep->com.state, ep->hwtid, status); attrs.next_state = C4IW_QP_STATE_TERMINATE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + disconnect = 1; break; } default: break; } + mutex_unlock(&ep->com.mutex); + if (disconnect) + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); return 0; } @@ -1647,6 +1681,15 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_ARP_MISS; } +/* Returns whether a CPL status conveys negative advice. + */ +static int is_neg_adv(unsigned int status) +{ + return status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE || + status == CPL_ERR_KEEPALV_NEG_ADVICE; +} + #define ACT_OPEN_RETRY_COUNT 2 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, @@ -1835,7 +1878,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); - if (status == CPL_ERR_RTX_NEG_ADVICE) { + if (is_neg_adv(status)) { printk(KERN_WARNING MOD "Connection problems for atid %u\n", atid); return 0; @@ -1979,6 +2022,10 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN(1); } + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + opt2 |= T5_OPT_2_VALID; + opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + } rpl = cplhdr(skb); INIT_TP_WR(rpl, ep->hwtid); @@ -2240,13 +2287,13 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) disconnect = 0; break; case MORIBUND: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_complete_upcall(ep); + close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; disconnect = 0; @@ -2265,15 +2312,6 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -/* - * Returns whether an ABORT_REQ_RSS message is a negative advice. - */ -static int is_neg_adv_abort(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE; -} - static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); @@ -2287,7 +2325,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(req); ep = lookup_tid(t, tid); - if (is_neg_adv_abort(req->status)) { + if (is_neg_adv(req->status)) { PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, ep->hwtid); return 0; @@ -2309,10 +2347,10 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) case CONNECTING: break; case MPA_REQ_WAIT: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); break; case MPA_REQ_SENT: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) connect_reply_upcall(ep, -ECONNRESET); else { @@ -2417,7 +2455,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) __state_set(&ep->com, MORIBUND); break; case MORIBUND: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); if ((ep->com.cm_id) && (ep->com.qp)) { attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.qp->rhp, @@ -2425,7 +2463,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_complete_upcall(ep); + close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; break; @@ -2500,22 +2538,28 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { - int err; + int err = 0; + int disconnect = 0; struct c4iw_ep *ep = to_ep(cm_id); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return -ECONNRESET; } set_bit(ULP_REJECT, &ep->com.history); - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(ep->com.state != MPA_REQ_RCVD); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); else { err = send_mpa_reject(ep, pdata, pdata_len); - err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + disconnect = 1; } + mutex_unlock(&ep->com.mutex); + if (disconnect) + err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); c4iw_put_ep(&ep->com); return 0; } @@ -2530,12 +2574,14 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { + + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { err = -ECONNRESET; goto err; } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(ep->com.state != MPA_REQ_RCVD); BUG_ON(!qp); set_bit(ULP_ACCEPT, &ep->com.history); @@ -2604,14 +2650,16 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (err) goto err1; - state_set(&ep->com, FPDU_MODE); + __state_set(&ep->com, FPDU_MODE); established_upcall(ep); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return 0; err1: ep->com.cm_id = NULL; cm_id->rem_ref(cm_id); err: + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return err; } @@ -2980,7 +3028,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) rdev = &ep->com.dev->rdev; if (c4iw_fatal_error(rdev)) { fatal = 1; - close_complete_upcall(ep); + close_complete_upcall(ep, -EIO); ep->com.state = DEAD; } switch (ep->com.state) { @@ -3002,7 +3050,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { close = 1; if (abrupt) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); ep->com.state = ABORTING; } else ep->com.state = MORIBUND; @@ -3022,7 +3070,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { set_bit(EP_DISC_ABORT, &ep->com.history); - close_complete_upcall(ep); + close_complete_upcall(ep, -ECONNRESET); ret = send_abort(ep, NULL, gfp); } else { set_bit(EP_DISC_CLOSE, &ep->com.history); @@ -3203,6 +3251,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, struct sk_buff *req_skb; struct fw_ofld_connection_wr *req; struct cpl_pass_accept_req *cpl = cplhdr(skb); + int ret; req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); @@ -3239,7 +3288,13 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, req->cookie = (unsigned long)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); - cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + if (ret < 0) { + pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__, + ret); + kfree_skb(skb); + kfree_skb(req_skb); + } } /* @@ -3346,13 +3401,13 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) pi = (struct port_info *)netdev_priv(pdev); tx_chan = cxgb4_port_chan(pdev); } + neigh_release(neigh); if (!e) { pr_err("%s - failed to allocate l2t entry!\n", __func__); goto free_dst; } - neigh_release(neigh); step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; window = (__force u16) htons((__force u16)tcph->window); @@ -3427,15 +3482,26 @@ static void process_timeout(struct c4iw_ep *ep) &attrs, 1); } __state_set(&ep->com, ABORTING); + close_complete_upcall(ep, -ETIMEDOUT); + break; + case ABORTING: + case DEAD: + + /* + * These states are expected if the ep timed out at the same + * time as another thread was calling stop_ep_timer(). + * So we silently do nothing for these states. + */ + abort = 0; break; default: WARN(1, "%s unexpected state ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); abort = 0; } - mutex_unlock(&ep->com.mutex); if (abort) abort_connection(ep, NULL, GFP_KERNEL); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); } @@ -3449,6 +3515,8 @@ static void process_timedout_eps(void) tmp = timeout_list.next; list_del(tmp); + tmp->next = NULL; + tmp->prev = NULL; spin_unlock_irq(&timeout_lock); ep = list_entry(tmp, struct c4iw_ep, entry); process_timeout(ep); @@ -3465,6 +3533,7 @@ static void process_work(struct work_struct *work) unsigned int opcode; int ret; + process_timedout_eps(); while ((skb = skb_dequeue(&rxq))) { rpl = cplhdr(skb); dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); @@ -3474,8 +3543,8 @@ static void process_work(struct work_struct *work) ret = work_handlers[opcode](dev, skb); if (!ret) kfree_skb(skb); + process_timedout_eps(); } - process_timedout_eps(); } static DECLARE_WORK(skb_work, process_work); @@ -3487,8 +3556,13 @@ static void ep_timeout(unsigned long arg) spin_lock(&timeout_lock); if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { - list_add_tail(&ep->entry, &timeout_list); - kickit = 1; + /* + * Only insert if it is not already on the list. + */ + if (!ep->entry.next) { + list_add_tail(&ep->entry, &timeout_list); + kickit = 1; + } } spin_unlock(&timeout_lock); if (kickit) @@ -3570,7 +3644,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) kfree_skb(skb); return 0; } - if (is_neg_adv_abort(req->status)) { + if (is_neg_adv(req->status)) { PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, ep->hwtid); kfree_skb(skb); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 88de3aa9c5b0..cfaa56ada189 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -235,27 +235,21 @@ int c4iw_flush_sq(struct c4iw_qp *qhp) struct t4_cq *cq = &chp->cq; int idx; struct t4_swsqe *swsqe; - int error = (qhp->attr.state != C4IW_QP_STATE_CLOSING && - qhp->attr.state != C4IW_QP_STATE_IDLE); if (wq->sq.flush_cidx == -1) wq->sq.flush_cidx = wq->sq.cidx; idx = wq->sq.flush_cidx; BUG_ON(idx >= wq->sq.size); while (idx != wq->sq.pidx) { - if (error) { - swsqe = &wq->sq.sw_sq[idx]; - BUG_ON(swsqe->flushed); - swsqe->flushed = 1; - insert_sq_cqe(wq, cq, swsqe); - if (wq->sq.oldest_read == swsqe) { - BUG_ON(swsqe->opcode != FW_RI_READ_REQ); - advance_oldest_read(wq); - } - flushed++; - } else { - t4_sq_consume(wq); + swsqe = &wq->sq.sw_sq[idx]; + BUG_ON(swsqe->flushed); + swsqe->flushed = 1; + insert_sq_cqe(wq, cq, swsqe); + if (wq->sq.oldest_read == swsqe) { + BUG_ON(swsqe->opcode != FW_RI_READ_REQ); + advance_oldest_read(wq); } + flushed++; if (++idx == wq->sq.size) idx = 0; } @@ -365,8 +359,14 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { - /* - * drop peer2peer RTR reads. + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) + goto next_cqe; + + /* drop peer2peer RTR reads. */ if (CQE_WRID_STAG(hw_cqe) == 1) goto next_cqe; @@ -511,8 +511,18 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { - /* - * If this is an unsolicited read response, then the read + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* If this is an unsolicited read response, then the read * was generated by the kernel driver as part of peer-2-peer * connection setup. So ignore the completion. */ @@ -603,7 +613,7 @@ proc_cqe: */ if (SQ_TYPE(hw_cqe)) { int idx = CQE_WRID_SQ_IDX(hw_cqe); - BUG_ON(idx > wq->sq.size); + BUG_ON(idx >= wq->sq.size); /* * Account for any unsignaled completions completed by @@ -617,7 +627,7 @@ proc_cqe: wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; else wq->sq.in_use -= idx - wq->sq.cidx; - BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size); + BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); wq->sq.cidx = (uint16_t)idx; PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx); @@ -662,7 +672,7 @@ skip_cqe: static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) { struct c4iw_qp *qhp = NULL; - struct t4_cqe cqe = {0, 0}, *rd_cqe; + struct t4_cqe uninitialized_var(cqe), *rd_cqe; struct t4_wq *wq; u32 credit = 0; u8 cqe_flushed; @@ -881,7 +891,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, /* * Make actual HW queue 2x to avoid cdix_inc overflows. */ - hwentries = entries * 2; + hwentries = min(entries * 2, T4_MAX_IQ_SIZE); /* * Make HW queue at least 64 entries so GTS updates aren't too @@ -930,6 +940,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, if (!mm2) goto err4; + memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 4a033853312e..f4fa50a609e2 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -64,6 +64,10 @@ struct uld_ctx { static LIST_HEAD(uld_ctx_list); static DEFINE_MUTEX(dev_mutex); +#define DB_FC_RESUME_SIZE 64 +#define DB_FC_RESUME_DELAY 1 +#define DB_FC_DRAIN_THRESH 0 + static struct dentry *c4iw_debugfs_root; struct c4iw_debugfs_data { @@ -282,7 +286,7 @@ static const struct file_operations stag_debugfs_fops = { .llseek = default_llseek, }; -static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"}; +static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"}; static int stats_show(struct seq_file *seq, void *v) { @@ -311,9 +315,10 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full); seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty); seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop); - seq_printf(seq, " DB State: %s Transitions %llu\n", + seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n", db_state_str[dev->db_state], - dev->rdev.stats.db_state_transitions); + dev->rdev.stats.db_state_transitions, + dev->rdev.stats.db_fc_interruptions); seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full); seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n", dev->rdev.stats.act_ofld_conn_fails); @@ -643,6 +648,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err); goto err4; } + rdev->status_page = (struct t4_dev_status_page *) + __get_free_page(GFP_KERNEL); + if (!rdev->status_page) { + pr_err(MOD "error allocating status page\n"); + goto err4; + } return 0; err4: c4iw_rqtpool_destroy(rdev); @@ -656,6 +667,7 @@ err1: static void c4iw_rdev_close(struct c4iw_rdev *rdev) { + free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); c4iw_rqtpool_destroy(rdev); c4iw_destroy_resource(&rdev->resource); @@ -670,7 +682,10 @@ static void c4iw_dealloc(struct uld_ctx *ctx) idr_destroy(&ctx->dev->hwtid_idr); idr_destroy(&ctx->dev->stid_idr); idr_destroy(&ctx->dev->atid_idr); - iounmap(ctx->dev->rdev.oc_mw_kva); + if (ctx->dev->rdev.bar2_kva) + iounmap(ctx->dev->rdev.bar2_kva); + if (ctx->dev->rdev.oc_mw_kva) + iounmap(ctx->dev->rdev.oc_mw_kva); ib_dealloc_device(&ctx->dev->ibdev); ctx->dev = NULL; } @@ -703,18 +718,6 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) pr_info("%s: On-Chip Queues not supported on this device.\n", pci_name(infop->pdev)); - if (!is_t4(infop->adapter_type)) { - if (!allow_db_fc_on_t5) { - db_fc_threshold = 100000; - pr_info("DB Flow Control Disabled.\n"); - } - - if (!allow_db_coalescing_on_t5) { - db_coalescing_threshold = -1; - pr_info("DB Coalescing Disabled.\n"); - } - } - devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); if (!devp) { printk(KERN_ERR MOD "Cannot allocate ib device\n"); @@ -722,11 +725,31 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) } devp->rdev.lldi = *infop; - devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) + - (pci_resource_len(devp->rdev.lldi.pdev, 2) - - roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size)); - devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, - devp->rdev.lldi.vr->ocq.size); + /* + * For T5 devices, we map all of BAR2 with WC. + * For T4 devices with onchip qp mem, we map only that part + * of BAR2 with WC. + */ + devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2); + if (is_t5(devp->rdev.lldi.adapter_type)) { + devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa, + pci_resource_len(devp->rdev.lldi.pdev, 2)); + if (!devp->rdev.bar2_kva) { + pr_err(MOD "Unable to ioremap BAR2\n"); + return ERR_PTR(-EINVAL); + } + } else if (ocqp_supported(infop)) { + devp->rdev.oc_mw_pa = + pci_resource_start(devp->rdev.lldi.pdev, 2) + + pci_resource_len(devp->rdev.lldi.pdev, 2) - + roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size); + devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, + devp->rdev.lldi.vr->ocq.size); + if (!devp->rdev.oc_mw_kva) { + pr_err(MOD "Unable to ioremap onchip mem\n"); + return ERR_PTR(-EINVAL); + } + } PDBG(KERN_INFO MOD "ocq memory: " "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", @@ -749,6 +772,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) spin_lock_init(&devp->lock); mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); + INIT_LIST_HEAD(&devp->db_fc_list); if (c4iw_debugfs_root) { devp->debugfs_root = debugfs_create_dir( @@ -897,11 +921,13 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, } opcode = *(u8 *)rsp; - if (c4iw_handlers[opcode]) + if (c4iw_handlers[opcode]) { c4iw_handlers[opcode](dev, skb); - else + } else { pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); + kfree_skb(skb); + } return 0; nomem: @@ -977,13 +1003,16 @@ static int disable_qp_db(int id, void *p, void *data) static void stop_queues(struct uld_ctx *ctx) { - spin_lock_irq(&ctx->dev->lock); - if (ctx->dev->db_state == NORMAL) { - ctx->dev->rdev.stats.db_state_transitions++; - ctx->dev->db_state = FLOW_CONTROL; + unsigned long flags; + + spin_lock_irqsave(&ctx->dev->lock, flags); + ctx->dev->rdev.stats.db_state_transitions++; + ctx->dev->db_state = STOPPED; + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); - } - spin_unlock_irq(&ctx->dev->lock); + else + ctx->dev->rdev.status_page->db_off = 1; + spin_unlock_irqrestore(&ctx->dev->lock, flags); } static int enable_qp_db(int id, void *p, void *data) @@ -994,15 +1023,72 @@ static int enable_qp_db(int id, void *p, void *data) return 0; } +static void resume_rc_qp(struct c4iw_qp *qp) +{ + spin_lock(&qp->lock); + t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, + is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + qp->wq.sq.wq_pidx_inc = 0; + t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, + is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); +} + +static void resume_a_chunk(struct uld_ctx *ctx) +{ + int i; + struct c4iw_qp *qp; + + for (i = 0; i < DB_FC_RESUME_SIZE; i++) { + qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp, + db_fc_entry); + list_del_init(&qp->db_fc_entry); + resume_rc_qp(qp); + if (list_empty(&ctx->dev->db_fc_list)) + break; + } +} + static void resume_queues(struct uld_ctx *ctx) { spin_lock_irq(&ctx->dev->lock); - if (ctx->dev->qpcnt <= db_fc_threshold && - ctx->dev->db_state == FLOW_CONTROL) { - ctx->dev->db_state = NORMAL; - ctx->dev->rdev.stats.db_state_transitions++; - idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); + if (ctx->dev->db_state != STOPPED) + goto out; + ctx->dev->db_state = FLOW_CONTROL; + while (1) { + if (list_empty(&ctx->dev->db_fc_list)) { + WARN_ON(ctx->dev->db_state != FLOW_CONTROL); + ctx->dev->db_state = NORMAL; + ctx->dev->rdev.stats.db_state_transitions++; + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) { + idr_for_each(&ctx->dev->qpidr, enable_qp_db, + NULL); + } else { + ctx->dev->rdev.status_page->db_off = 0; + } + break; + } else { + if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) + < (ctx->dev->rdev.lldi.dbfifo_int_thresh << + DB_FC_DRAIN_THRESH)) { + resume_a_chunk(ctx); + } + if (!list_empty(&ctx->dev->db_fc_list)) { + spin_unlock_irq(&ctx->dev->lock); + if (DB_FC_RESUME_DELAY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(DB_FC_RESUME_DELAY); + } + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->db_state != FLOW_CONTROL) + break; + } + } } +out: + if (ctx->dev->db_state != NORMAL) + ctx->dev->rdev.stats.db_fc_interruptions++; spin_unlock_irq(&ctx->dev->lock); } @@ -1028,12 +1114,12 @@ static int count_qps(int id, void *p, void *data) return 0; } -static void deref_qps(struct qp_list qp_list) +static void deref_qps(struct qp_list *qp_list) { int idx; - for (idx = 0; idx < qp_list.idx; idx++) - c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp); + for (idx = 0; idx < qp_list->idx; idx++) + c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp); } static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) @@ -1044,17 +1130,22 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) for (idx = 0; idx < qp_list->idx; idx++) { struct c4iw_qp *qp = qp_list->qps[idx]; + spin_lock_irq(&qp->rhp->lock); + spin_lock(&qp->lock); ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], qp->wq.sq.qid, t4_sq_host_wq_pidx(&qp->wq), t4_sq_wq_size(&qp->wq)); if (ret) { - printk(KERN_ERR MOD "%s: Fatal error - " + pr_err(KERN_ERR MOD "%s: Fatal error - " "DB overflow recovery failed - " "error syncing SQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.sq.qid); + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); return; } + qp->wq.sq.wq_pidx_inc = 0; ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], qp->wq.rq.qid, @@ -1062,12 +1153,17 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) t4_rq_wq_size(&qp->wq)); if (ret) { - printk(KERN_ERR MOD "%s: Fatal error - " + pr_err(KERN_ERR MOD "%s: Fatal error - " "DB overflow recovery failed - " "error syncing RQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.rq.qid); + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); return; } + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); /* Wait for the dbfifo to drain */ while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) { @@ -1083,36 +1179,22 @@ static void recover_queues(struct uld_ctx *ctx) struct qp_list qp_list; int ret; - /* lock out kernel db ringers */ - mutex_lock(&ctx->dev->db_mutex); - - /* put all queues in to recovery mode */ - spin_lock_irq(&ctx->dev->lock); - ctx->dev->db_state = RECOVERY; - ctx->dev->rdev.stats.db_state_transitions++; - idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); - spin_unlock_irq(&ctx->dev->lock); - /* slow everybody down */ set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(usecs_to_jiffies(1000)); - /* Wait for the dbfifo to completely drain. */ - while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(10)); - } - /* flush the SGE contexts */ ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]); if (ret) { printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", pci_name(ctx->lldi.pdev)); - goto out; + return; } /* Count active queues so we can build a list of queues to recover */ spin_lock_irq(&ctx->dev->lock); + WARN_ON(ctx->dev->db_state != STOPPED); + ctx->dev->db_state = RECOVERY; idr_for_each(&ctx->dev->qpidr, count_qps, &count); qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC); @@ -1120,7 +1202,7 @@ static void recover_queues(struct uld_ctx *ctx) printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", pci_name(ctx->lldi.pdev)); spin_unlock_irq(&ctx->dev->lock); - goto out; + return; } qp_list.idx = 0; @@ -1133,29 +1215,13 @@ static void recover_queues(struct uld_ctx *ctx) recover_lost_dbs(ctx, &qp_list); /* we're almost done! deref the qps and clean up */ - deref_qps(qp_list); + deref_qps(&qp_list); kfree(qp_list.qps); - /* Wait for the dbfifo to completely drain again */ - while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(10)); - } - - /* resume the queues */ spin_lock_irq(&ctx->dev->lock); - if (ctx->dev->qpcnt > db_fc_threshold) - ctx->dev->db_state = FLOW_CONTROL; - else { - ctx->dev->db_state = NORMAL; - idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); - } - ctx->dev->rdev.stats.db_state_transitions++; + WARN_ON(ctx->dev->db_state != RECOVERY); + ctx->dev->db_state = STOPPED; spin_unlock_irq(&ctx->dev->lock); - -out: - /* start up kernel db ringers again */ - mutex_unlock(&ctx->dev->db_mutex); } static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) @@ -1165,9 +1231,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) switch (control) { case CXGB4_CONTROL_DB_FULL: stop_queues(ctx); - mutex_lock(&ctx->dev->rdev.stats.lock); ctx->dev->rdev.stats.db_full++; - mutex_unlock(&ctx->dev->rdev.stats.lock); break; case CXGB4_CONTROL_DB_EMPTY: resume_queues(ctx); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 23eaeabab93b..7474b490760a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -109,6 +109,7 @@ struct c4iw_dev_ucontext { enum c4iw_rdev_flags { T4_FATAL_ERROR = (1<<0), + T4_STATUS_PAGE_DISABLED = (1<<1), }; struct c4iw_stat { @@ -130,6 +131,7 @@ struct c4iw_stats { u64 db_empty; u64 db_drop; u64 db_state_transitions; + u64 db_fc_interruptions; u64 tcam_full; u64 act_ofld_conn_fails; u64 pas_ofld_conn_fails; @@ -147,9 +149,12 @@ struct c4iw_rdev { struct gen_pool *ocqp_pool; u32 flags; struct cxgb4_lld_info lldi; + unsigned long bar2_pa; + void __iomem *bar2_kva; unsigned long oc_mw_pa; void __iomem *oc_mw_kva; struct c4iw_stats stats; + struct t4_dev_status_page *status_page; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -211,7 +216,8 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, enum db_state { NORMAL = 0, FLOW_CONTROL = 1, - RECOVERY = 2 + RECOVERY = 2, + STOPPED = 3 }; struct c4iw_dev { @@ -225,10 +231,10 @@ struct c4iw_dev { struct mutex db_mutex; struct dentry *debugfs_root; enum db_state db_state; - int qpcnt; struct idr hwtid_idr; struct idr atid_idr; struct idr stid_idr; + struct list_head db_fc_list; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -369,6 +375,7 @@ struct c4iw_fr_page_list { DEFINE_DMA_UNMAP_ADDR(mapping); dma_addr_t dma_addr; struct c4iw_dev *dev; + int pll_len; }; static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( @@ -428,10 +435,12 @@ struct c4iw_qp_attributes { u8 ecode; u16 sq_db_inc; u16 rq_db_inc; + u8 send_term; }; struct c4iw_qp { struct ib_qp ibqp; + struct list_head db_fc_entry; struct c4iw_dev *rhp; struct c4iw_ep *ep; struct c4iw_qp_attributes attr; @@ -441,6 +450,7 @@ struct c4iw_qp { atomic_t refcnt; wait_queue_head_t wait; struct timer_list timer; + int sq_sig_all; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 41b11951a30a..ec7a2988a703 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -37,9 +37,9 @@ #include "iw_cxgb4.h" -int use_dsgl = 1; +int use_dsgl = 0; module_param(use_dsgl, int, 0644); -MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1)"); +MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)"); #define T4_ULPTX_MIN_IO 32 #define C4IW_MAX_INLINE_SIZE 96 @@ -259,8 +259,12 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) { stag_idx = c4iw_get_resource(&rdev->resource.tpt_table); - if (!stag_idx) + if (!stag_idx) { + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.fail++; + mutex_unlock(&rdev->stats.lock); return -ENOMEM; + } mutex_lock(&rdev->stats.lock); rdev->stats.stag.cur += 32; if (rdev->stats.stag.cur > rdev->stats.stag.max) @@ -678,9 +682,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -710,10 +714,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; - + n = mhp->umem->nmap; err = alloc_pbl(mhp, n); if (err) goto err; @@ -726,24 +727,22 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + - mhp->umem->page_size * k); - if (i == PAGE_SIZE / sizeof *pages) { - err = write_pbl(&mhp->rhp->rdev, - pages, - mhp->attr.pbl_addr + (n << 3), i); - if (err) - goto pbl_done; - n += i; - i = 0; - } + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = cpu_to_be64(sg_dma_address(sg) + + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = write_pbl(&mhp->rhp->rdev, + pages, + mhp->attr.pbl_addr + (n << 3), i); + if (err) + goto pbl_done; + n += i; + i = 0; } } + } if (i) err = write_pbl(&mhp->rhp->rdev, pages, @@ -903,7 +902,11 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, dma_unmap_addr_set(c4pl, mapping, dma_addr); c4pl->dma_addr = dma_addr; c4pl->dev = dev; - c4pl->ibpl.max_page_list_len = pll_len; + c4pl->pll_len = pll_len; + + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); return &c4pl->ibpl; } @@ -912,8 +915,12 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) { struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); + dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, - c4pl->ibpl.max_page_list_len, + c4pl->pll_len, c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); kfree(c4pl); } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 7e94c9a656a1..a94a3e12c349 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -106,15 +106,56 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, { struct c4iw_ucontext *context; struct c4iw_dev *rhp = to_c4iw_dev(ibdev); + static int warned; + struct c4iw_alloc_ucontext_resp uresp; + int ret = 0; + struct c4iw_mm_entry *mm = NULL; PDBG("%s ibdev %p\n", __func__, ibdev); context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); + if (!context) { + ret = -ENOMEM; + goto err; + } + c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); + + if (udata->outlen < sizeof(uresp)) { + if (!warned++) + pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled."); + rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED; + } else { + mm = kmalloc(sizeof(*mm), GFP_KERNEL); + if (!mm) { + ret = -ENOMEM; + goto err_free; + } + + uresp.status_page_size = PAGE_SIZE; + + spin_lock(&context->mmap_lock); + uresp.status_page_key = context->key; + context->key += PAGE_SIZE; + spin_unlock(&context->mmap_lock); + + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) + goto err_mm; + + mm->key = uresp.status_page_key; + mm->addr = virt_to_phys(rhp->rdev.status_page); + mm->len = PAGE_SIZE; + insert_mmap(context, mm); + } return &context->ibucontext; +err_mm: + kfree(mm); +err_free: + kfree(context); +err: + return ERR_PTR(ret); } static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) @@ -287,7 +328,7 @@ static int c4iw_query_device(struct ib_device *ibdev, props->max_mr = c4iw_num_stags(&dev->rdev); props->max_pd = T4_MAX_NUM_PD; props->local_ca_ack_delay = 0; - props->max_fast_reg_page_list_len = T4_MAX_FR_DEPTH; + props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 582936708e6e..086f62f5dc9e 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -212,13 +212,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, wq->db = rdev->lldi.db_reg; wq->gts = rdev->lldi.gts_reg; - if (user) { - wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + - (wq->sq.qid << rdev->qpshift); - wq->sq.udb &= PAGE_MASK; - wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + - (wq->rq.qid << rdev->qpshift); - wq->rq.udb &= PAGE_MASK; + if (user || is_t5(rdev->lldi.adapter_type)) { + u32 off; + + off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK; + if (user) { + wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off); + } else { + off += 128 * (wq->sq.qid & rdev->qpmask) + 8; + wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off); + } + off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK; + if (user) { + wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off); + } else { + off += 128 * (wq->rq.qid & rdev->qpmask) + 8; + wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off); + } } wq->rdev = rdev; wq->rq.msn = 1; @@ -299,9 +309,10 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (ret) goto free_dma; - PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", + PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n", __func__, wq->sq.qid, wq->rq.qid, wq->db, - (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); + (__force unsigned long) wq->sq.udb, + (__force unsigned long) wq->rq.udb); return 0; free_dma: @@ -425,6 +436,8 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, default: return -EINVAL; } + wqe->send.r3 = 0; + wqe->send.r4 = 0; plen = 0; if (wr->num_sge) { @@ -555,7 +568,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); int rem; - if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) + if (wr->wr.fast_reg.page_list_len > + t4_max_fr_depth(use_dsgl)) return -EINVAL; wqe->fr.qpbinde_to_dcacpu = 0; @@ -638,6 +652,48 @@ void c4iw_qp_rem_ref(struct ib_qp *qp) wake_up(&(to_c4iw_qp(qp)->wait)); } +static void add_to_fc_list(struct list_head *head, struct list_head *entry) +{ + if (list_empty(entry)) + list_add_tail(entry, head); +} + +static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) + t4_ring_sq_db(&qhp->wq, inc, + is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.sq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&qhp->rhp->lock, flags); + return 0; +} + +static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) + t4_ring_rq_db(&qhp->wq, inc, + is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.rq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&qhp->rhp->lock, flags); + return 0; +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -646,7 +702,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, enum fw_wr_opcodes fw_opcode = 0; enum fw_ri_wr_flags fw_flags; struct c4iw_qp *qhp; - union t4_wr *wqe; + union t4_wr *wqe = NULL; u32 num_wrs; struct t4_swsqe *swsqe; unsigned long flag; @@ -675,7 +731,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all) fw_flags |= FW_RI_COMPLETION_FLAG; swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; switch (wr->opcode) { @@ -736,7 +792,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; - swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); + swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; swsqe->flushed = 0; swsqe->wr_id = wr->wr_id; @@ -750,9 +807,14 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, t4_sq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } - if (t4_wq_db_enabled(&qhp->wq)) - t4_ring_sq_db(&qhp->wq, idx); - spin_unlock_irqrestore(&qhp->lock, flag); + if (!qhp->rhp->rdev.status_page->db_off) { + t4_ring_sq_db(&qhp->wq, idx, + is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_sq_db(qhp, idx); + } return err; } @@ -761,7 +823,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, { int err = 0; struct c4iw_qp *qhp; - union t4_recv_wr *wqe; + union t4_recv_wr *wqe = NULL; u32 num_wrs; u8 len16 = 0; unsigned long flag; @@ -812,9 +874,14 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, wr = wr->next; num_wrs--; } - if (t4_wq_db_enabled(&qhp->wq)) - t4_ring_rq_db(&qhp->wq, idx); - spin_unlock_irqrestore(&qhp->lock, flag); + if (!qhp->rhp->rdev.status_page->db_off) { + t4_ring_rq_db(&qhp->wq, idx, + is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_rq_db(qhp, idx); + } return err; } @@ -1200,35 +1267,6 @@ out: return ret; } -/* - * Called by the library when the qp has user dbs disabled due to - * a DB_FULL condition. This function will single-thread all user - * DB rings to avoid overflowing the hw db-fifo. - */ -static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc) -{ - int delay = db_delay_usecs; - - mutex_lock(&qhp->rhp->db_mutex); - do { - - /* - * The interrupt threshold is dbfifo_int_thresh << 6. So - * make sure we don't cross that and generate an interrupt. - */ - if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < - (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) { - writel(QID(qid) | PIDX(inc), qhp->wq.db); - break; - } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(delay)); - delay = min(delay << 1, 2000); - } while (1); - mutex_unlock(&qhp->rhp->db_mutex); - return 0; -} - int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, enum c4iw_qp_attr_mask mask, struct c4iw_qp_attributes *attrs, @@ -1278,11 +1316,11 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } if (mask & C4IW_QP_ATTR_SQ_DB) { - ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc); + ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc); goto out; } if (mask & C4IW_QP_ATTR_RQ_DB) { - ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc); + ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc); goto out; } @@ -1332,6 +1370,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { @@ -1339,30 +1378,30 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, disconnect = 1; c4iw_get_ep(&qhp->ep->com); } - t4_set_wq_in_error(&qhp->wq); ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; break; case C4IW_QP_STATE_TERMINATE: + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; - t4_set_wq_in_error(&qhp->wq); ep = qhp->ep; - disconnect = 1; - if (!internal) + if (!internal) { + c4iw_get_ep(&qhp->ep->com); terminate = 1; - else { + disconnect = 1; + } else { + terminate = qhp->attr.send_term; ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; } - c4iw_get_ep(&qhp->ep->com); break; case C4IW_QP_STATE_ERROR: - set_state(qhp, C4IW_QP_STATE_ERROR); t4_set_wq_in_error(&qhp->wq); + set_state(qhp, C4IW_QP_STATE_ERROR); if (!internal) { abort = 1; disconnect = 1; @@ -1465,14 +1504,6 @@ out: return ret; } -static int enable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_enable_wq_db(&qp->wq); - return 0; -} - int c4iw_destroy_qp(struct ib_qp *ib_qp) { struct c4iw_dev *rhp; @@ -1490,22 +1521,15 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - spin_lock_irq(&rhp->lock); - remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid); - rhp->qpcnt--; - BUG_ON(rhp->qpcnt < 0); - if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) { - rhp->rdev.stats.db_state_transitions++; - rhp->db_state = NORMAL; - idr_for_each(&rhp->qpidr, enable_qp_db, NULL); - } - if (db_coalescing_threshold >= 0) - if (rhp->qpcnt <= db_coalescing_threshold) - cxgb4_enable_db_coalescing(rhp->rdev.lldi.ports[0]); - spin_unlock_irq(&rhp->lock); + remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); + spin_lock_irq(&rhp->lock); + if (!list_empty(&qhp->db_fc_entry)) + list_del_init(&qhp->db_fc_entry); + spin_unlock_irq(&rhp->lock); + ucontext = ib_qp->uobject ? to_c4iw_ucontext(ib_qp->uobject->context) : NULL; destroy_qp(&rhp->rdev, &qhp->wq, @@ -1516,14 +1540,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) return 0; } -static int disable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_disable_wq_db(&qp->wq); - return 0; -} - struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { @@ -1533,7 +1549,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - int sqsize, rqsize; + unsigned int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; @@ -1605,25 +1621,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.enable_bind = 1; qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; + qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); - spin_lock_irq(&rhp->lock); - if (rhp->db_state != NORMAL) - t4_disable_wq_db(&qhp->wq); - rhp->qpcnt++; - if (rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { - rhp->rdev.stats.db_state_transitions++; - rhp->db_state = FLOW_CONTROL; - idr_for_each(&rhp->qpidr, disable_qp_db, NULL); - } - if (db_coalescing_threshold >= 0) - if (rhp->qpcnt > db_coalescing_threshold) - cxgb4_disable_db_coalescing(rhp->rdev.lldi.ports[0]); - ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); - spin_unlock_irq(&rhp->lock); + ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) goto err2; @@ -1692,11 +1696,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); insert_mmap(ucontext, mm2); mm3->key = uresp.sq_db_gts_key; - mm3->addr = qhp->wq.sq.udb; + mm3->addr = (__force unsigned long) qhp->wq.sq.udb; mm3->len = PAGE_SIZE; insert_mmap(ucontext, mm3); mm4->key = uresp.rq_db_gts_key; - mm4->addr = qhp->wq.rq.udb; + mm4->addr = (__force unsigned long) qhp->wq.rq.udb; mm4->len = PAGE_SIZE; insert_mmap(ucontext, mm4); if (mm5) { @@ -1709,6 +1713,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); + INIT_LIST_HEAD(&qhp->db_fc_entry); PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n", __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.sq.qid); @@ -1772,11 +1777,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, /* * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for * ringing the queue db when we're in DB_FULL mode. + * Only allow this on T4 devices. */ attrs.sq_db_inc = attr->sq_psn; attrs.rq_db_inc = attr->rq_psn; mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; + if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && + (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) + return -EINVAL; return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); } diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index cdef4d7fb6d8..67df71a7012e 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -179,8 +179,12 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) kfree(entry); } else { qid = c4iw_get_resource(&rdev->resource.qid_table); - if (!qid) + if (!qid) { + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.fail++; + mutex_unlock(&rdev->stats.lock); goto out; + } mutex_lock(&rdev->stats.lock); rdev->stats.qid.cur += rdev->qpmask + 1; mutex_unlock(&rdev->stats.lock); @@ -322,8 +326,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6); PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); if (!addr) - printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n", - pci_name(rdev->lldi.pdev)); + pr_warn_ratelimited(MOD "%s: Out of RQT memory\n", + pci_name(rdev->lldi.pdev)); mutex_lock(&rdev->stats.lock); if (addr) { rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT); diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index e73ace739183..2178f3198410 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -84,7 +84,14 @@ struct t4_status_page { sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ sizeof(struct fw_ri_immd)) & ~31UL) -#define T4_MAX_FR_DEPTH (1024 / sizeof(u64)) +#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) +#define T4_MAX_FR_DSGL 1024 +#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64)) + +static inline int t4_max_fr_depth(int use_dsgl) +{ + return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH; +} #define T4_RQ_NUM_SLOTS 2 #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) @@ -292,7 +299,7 @@ struct t4_sq { unsigned long phys_addr; struct t4_swsqe *sw_sq; struct t4_swsqe *oldest_read; - u64 udb; + u64 __iomem *udb; size_t memsize; u32 qid; u16 in_use; @@ -300,6 +307,7 @@ struct t4_sq { u16 cidx; u16 pidx; u16 wq_pidx; + u16 wq_pidx_inc; u16 flags; short flush_cidx; }; @@ -313,7 +321,7 @@ struct t4_rq { dma_addr_t dma_addr; DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_swrqe *sw_rq; - u64 udb; + u64 __iomem *udb; size_t memsize; u32 qid; u32 msn; @@ -324,6 +332,7 @@ struct t4_rq { u16 cidx; u16 pidx; u16 wq_pidx; + u16 wq_pidx_inc; }; struct t4_wq { @@ -433,15 +442,67 @@ static inline u16 t4_sq_wq_size(struct t4_wq *wq) return wq->sq.size * T4_SQ_NUM_SLOTS; } -static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) +/* This function copies 64 byte coalesced work request to memory + * mapped BAR2 space. For coalesced WRs, the SGE fetches data + * from the FIFO instead of from Host. + */ +static inline void pio_copy(u64 __iomem *dst, u64 *src) +{ + int count = 8; + + while (count) { + writeq(*src, dst); + src++; + dst++; + count--; + } +} + +static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, + union t4_wr *wqe) { + + /* Flush host queue memory writes. */ wmb(); + if (t5) { + if (inc == 1 && wqe) { + PDBG("%s: WC wq->sq.pidx = %d\n", + __func__, wq->sq.pidx); + pio_copy(wq->sq.udb + 7, (void *)wqe); + } else { + PDBG("%s: DB wq->sq.pidx = %d\n", + __func__, wq->sq.pidx); + writel(PIDX_T5(inc), wq->sq.udb); + } + + /* Flush user doorbell area writes. */ + wmb(); + return; + } writel(QID(wq->sq.qid) | PIDX(inc), wq->db); } -static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) +static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, + union t4_recv_wr *wqe) { + + /* Flush host queue memory writes. */ wmb(); + if (t5) { + if (inc == 1 && wqe) { + PDBG("%s: WC wq->rq.pidx = %d\n", + __func__, wq->rq.pidx); + pio_copy(wq->rq.udb + 7, (void *)wqe); + } else { + PDBG("%s: DB wq->rq.pidx = %d\n", + __func__, wq->rq.pidx); + writel(PIDX_T5(inc), wq->rq.udb); + } + + /* Flush user doorbell area writes. */ + wmb(); + return; + } writel(QID(wq->rq.qid) | PIDX(inc), wq->db); } @@ -566,6 +627,9 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid); BUG_ON(1); } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { + + /* Ensure CQE is flushed to memory */ + rmb(); *cqe = &cq->queue[cq->cidx]; ret = 0; } else @@ -609,3 +673,7 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq) ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; } #endif + +struct t4_dev_status_page { + u8 db_off; +}; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index dc193c292671..6121ca08fe58 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -836,4 +836,18 @@ struct ulptx_idata { #define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE) #define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U) +enum { /* TCP congestion control algorithms */ + CONG_ALG_RENO, + CONG_ALG_TAHOE, + CONG_ALG_NEWRENO, + CONG_ALG_HIGHSPEED +}; + +#define S_CONG_CNTRL 14 +#define M_CONG_CNTRL 0x3 +#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) +#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) + +#define T5_OPT_2_VALID (1 << 31) + #endif /* _T4FW_RI_API_H_ */ diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index 32b754c35ab7..11ccd276e5d9 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -70,4 +70,9 @@ struct c4iw_create_qp_resp { __u32 qid_mask; __u32 flags; }; + +struct c4iw_alloc_ucontext_resp { + __u64 status_page_key; + __u32 status_page_size; +}; #endif |