From 3ba23ade464cca7c4a7ba5628c613339d3f2e161 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Fri, 17 Jul 2009 13:13:22 +0000 Subject: RDS: Set retry_count to 2 and make modifiable via modparam This will be default cause IB connections to failover faster, but allow a longer retry count to be used if desired. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/rds/ib.h') diff --git a/net/rds/ib.h b/net/rds/ib.h index 455ae73047fe..420afb95ca1a 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -15,6 +15,8 @@ #define RDS_IB_DEFAULT_RECV_WR 1024 #define RDS_IB_DEFAULT_SEND_WR 256 +#define RDS_IB_DEFAULT_RETRY_COUNT 2 + #define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ extern struct list_head rds_ib_devices; @@ -247,6 +249,7 @@ extern struct ib_client rds_ib_client; extern unsigned int fmr_pool_size; extern unsigned int fmr_message_size; +extern unsigned int rds_ib_retry_count; extern spinlock_t ib_nodev_conns_lock; extern struct list_head ib_nodev_conns; -- cgit v1.2.3 From 02a6a2592e41d27644d647f3bce23598649961bc Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Fri, 17 Jul 2009 13:13:24 +0000 Subject: RDS/IB: Handle connections using RDS 3.0 wire protocol The big differences between RDS 3.0 and 3.1 are protocol-level flow control, and with 3.1 the header is in front of the data. The header always ends up in the header buffer, and the data goes in the data page. In 3.0 our "header" is a trailer, and will end up either in the data page, the header buffer, or split across the two. Since 3.1 is backwards- compatible with 3.0, we need to continue to support these cases. This patch does that -- if using RDS 3.0 wire protocol, it will copy the header from wherever it ended up into the header buffer. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib.h | 12 ++++++++++-- net/rds/ib_cm.c | 9 ++++++--- net/rds/ib_recv.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 6 deletions(-) (limited to 'net/rds/ib.h') diff --git a/net/rds/ib.h b/net/rds/ib.h index 420afb95ca1a..c0de7af6cf60 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -358,17 +358,25 @@ extern ctl_table rds_ib_sysctl_table[]; /* * Helper functions for getting/setting the header and data SGEs in * RDS packets (not RDMA) + * + * From version 3.1 onwards, header is in front of data in the sge. */ static inline struct ib_sge * rds_ib_header_sge(struct rds_ib_connection *ic, struct ib_sge *sge) { - return &sge[0]; + if (ic->conn->c_version > RDS_PROTOCOL_3_0) + return &sge[0]; + else + return &sge[1]; } static inline struct ib_sge * rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge) { - return &sge[1]; + if (ic->conn->c_version > RDS_PROTOCOL_3_0) + return &sge[1]; + else + return &sge[0]; } #endif diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0964ac533ec8..1eb0c291a0b4 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -101,10 +101,13 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even if (event->param.conn.private_data_len >= sizeof(*dp)) { dp = event->param.conn.private_data; - rds_ib_set_protocol(conn, + /* make sure it isn't empty data */ + if (dp->dp_protocol_major) { + rds_ib_set_protocol(conn, RDS_PROTOCOL(dp->dp_protocol_major, - dp->dp_protocol_minor)); - rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); + dp->dp_protocol_minor)); + rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); + } } printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 5709bad28329..28bdcdcdbfe9 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -555,6 +555,47 @@ u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic) return rds_ib_get_ack(ic); } +static struct rds_header *rds_ib_get_header(struct rds_connection *conn, + struct rds_ib_recv_work *recv, + u32 data_len) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + void *hdr_buff = &ic->i_recv_hdrs[recv - ic->i_recvs]; + void *addr; + u32 misplaced_hdr_bytes; + + /* + * Support header at the front (RDS 3.1+) as well as header-at-end. + * + * Cases: + * 1) header all in header buff (great!) + * 2) header all in data page (copy all to header buff) + * 3) header split across hdr buf + data page + * (move bit in hdr buff to end before copying other bit from data page) + */ + if (conn->c_version > RDS_PROTOCOL_3_0 || data_len == RDS_FRAG_SIZE) + return hdr_buff; + + if (data_len <= (RDS_FRAG_SIZE - sizeof(struct rds_header))) { + addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0); + memcpy(hdr_buff, + addr + recv->r_frag->f_offset + data_len, + sizeof(struct rds_header)); + kunmap_atomic(addr, KM_SOFTIRQ0); + return hdr_buff; + } + + misplaced_hdr_bytes = (sizeof(struct rds_header) - (RDS_FRAG_SIZE - data_len)); + + memmove(hdr_buff + misplaced_hdr_bytes, hdr_buff, misplaced_hdr_bytes); + + addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0); + memcpy(hdr_buff, addr + recv->r_frag->f_offset + data_len, + sizeof(struct rds_header) - misplaced_hdr_bytes); + kunmap_atomic(addr, KM_SOFTIRQ0); + return hdr_buff; +} + /* * It's kind of lame that we're copying from the posted receive pages into * long-lived bitmaps. We could have posted the bitmaps and rdma written into @@ -667,7 +708,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, } byte_len -= sizeof(struct rds_header); - ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; + ihdr = rds_ib_get_header(conn, recv, byte_len); /* Validate the checksum. */ if (!rds_message_verify_checksum(ihdr)) { -- cgit v1.2.3 From a870d62726721785c34fa73d852bd35e5d1b295b Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Fri, 17 Jul 2009 13:13:33 +0000 Subject: RDS/IB: Always use PAGE_SIZE for FMR page size While FMRs allow significant flexibility in what size of pages they can use, we really just want FMR pages to match CPU page size. Roland says we can count on this always being supported, so this simplifies things. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib.c | 3 --- net/rds/ib.h | 3 --- net/rds/ib_rdma.c | 12 ++++++------ 3 files changed, 6 insertions(+), 12 deletions(-) (limited to 'net/rds/ib.h') diff --git a/net/rds/ib.c b/net/rds/ib.c index 27abdd3df2cc..868559ac42d7 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -85,9 +85,6 @@ void rds_ib_add_one(struct ib_device *device) rds_ibdev->max_wrs = dev_attr->max_qp_wr; rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); - rds_ibdev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1); - rds_ibdev->fmr_page_size = 1 << rds_ibdev->fmr_page_shift; - rds_ibdev->fmr_page_mask = ~((u64) rds_ibdev->fmr_page_size - 1); rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32; rds_ibdev->max_fmrs = dev_attr->max_fmr ? min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) : diff --git a/net/rds/ib.h b/net/rds/ib.h index c0de7af6cf60..1378b854cac0 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -159,9 +159,6 @@ struct rds_ib_device { struct ib_pd *pd; struct ib_mr *mr; struct rds_ib_mr_pool *mr_pool; - int fmr_page_shift; - int fmr_page_size; - u64 fmr_page_mask; unsigned int fmr_max_remaps; unsigned int max_fmrs; int max_sge; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 81033af93020..ef3ab5b7283e 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -211,7 +211,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) pool->fmr_attr.max_pages = fmr_message_size; pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; - pool->fmr_attr.page_shift = rds_ibdev->fmr_page_shift; + pool->fmr_attr.page_shift = PAGE_SHIFT; pool->max_free_pinned = rds_ibdev->max_fmrs * fmr_message_size / 4; /* We never allow more than max_items MRs to be allocated. @@ -349,13 +349,13 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]); u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); - if (dma_addr & ~rds_ibdev->fmr_page_mask) { + if (dma_addr & ~PAGE_MASK) { if (i > 0) return -EINVAL; else ++page_cnt; } - if ((dma_addr + dma_len) & ~rds_ibdev->fmr_page_mask) { + if ((dma_addr + dma_len) & ~PAGE_MASK) { if (i < sg_dma_len - 1) return -EINVAL; else @@ -365,7 +365,7 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm len += dma_len; } - page_cnt += len >> rds_ibdev->fmr_page_shift; + page_cnt += len >> PAGE_SHIFT; if (page_cnt > fmr_message_size) return -EINVAL; @@ -378,9 +378,9 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]); u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); - for (j = 0; j < dma_len; j += rds_ibdev->fmr_page_size) + for (j = 0; j < dma_len; j += PAGE_SIZE) dma_pages[page_cnt++] = - (dma_addr & rds_ibdev->fmr_page_mask) + j; + (dma_addr & PAGE_MASK) + j; } ret = ib_map_phys_fmr(ibmr->fmr, -- cgit v1.2.3