diff options
Diffstat (limited to 'drivers/infiniband/hw/ehca')
26 files changed, 2320 insertions, 1520 deletions
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig index 1a854598e0e6..59f807d8d58e 100644 --- a/drivers/infiniband/hw/ehca/Kconfig +++ b/drivers/infiniband/hw/ehca/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_EHCA tristate "eHCA support" - depends on IBMEBUS && INFINIBAND + depends on IBMEBUS ---help--- This driver supports the IBM pSeries eHCA InfiniBand adapter. diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 0d6e2c4bb245..97d108634c58 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -79,7 +79,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) av->av.ipd = (ah_mult > 0) ? ((ehca_mult - 1) / ah_mult) : 0; } else - av->av.ipd = ehca_static_rate; + av->av.ipd = ehca_static_rate; av->av.lnh = ah_attr->ah_flags; av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); @@ -118,7 +118,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); } - av->av.pmtu = EHCA_MAX_MTU; + av->av.pmtu = shca->max_mtu; /* dgid comes in grh.word_3 */ memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, @@ -137,6 +137,8 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) struct ehca_av *av; struct ehca_ud_av new_ehca_av; struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, + ib_device); u32 cur_pid = current->tgid; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && @@ -192,7 +194,7 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); } - new_ehca_av.pmtu = EHCA_MAX_MTU; + new_ehca_av.pmtu = shca->max_mtu; memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, sizeof(ah_attr->grh.dgid)); @@ -257,7 +259,7 @@ int ehca_init_av_cache(void) av_cache = kmem_cache_create("ehca_cache_av", sizeof(struct ehca_av), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!av_cache) return -ENOMEM; return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 1d286d3cc2d5..3725aa8664d9 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -5,6 +5,7 @@ * * Authors: Heiko J Schick <schickhj@de.ibm.com> * Christoph Raisch <raisch@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> * * Copyright (c) 2005 IBM Corporation * @@ -42,7 +43,6 @@ #ifndef __EHCA_CLASSES_H__ #define __EHCA_CLASSES_H__ - struct ehca_module; struct ehca_qp; struct ehca_cq; @@ -86,13 +86,24 @@ struct ehca_eq { struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; }; +struct ehca_sma_attr { + u16 lid, lmc, sm_sl, sm_lid; + u16 pkey_tbl_len, pkeys[16]; +}; + struct ehca_sport { struct ib_cq *ibcq_aqp1; struct ib_qp *ibqp_aqp1; enum ib_rate rate; enum ib_port_state port_state; + struct ehca_sma_attr saved_attr; }; +#define HCA_CAP_MR_PGSIZE_4K 1 +#define HCA_CAP_MR_PGSIZE_64K 2 +#define HCA_CAP_MR_PGSIZE_1M 4 +#define HCA_CAP_MR_PGSIZE_16M 8 + struct ehca_shca { struct ib_device ib_device; struct ibmebus_dev *ibmebus_dev; @@ -107,17 +118,36 @@ struct ehca_shca { struct ehca_pd *pd; struct h_galpas galpas; struct mutex modify_mutex; + u64 hca_cap; + /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ + u32 hca_cap_mr_pgsize; + int max_mtu; }; struct ehca_pd { struct ib_pd ib_pd; struct ipz_pd fw_pd; u32 ownpid; + /* small queue mgmt */ + struct mutex lock; + struct list_head free[2]; + struct list_head full[2]; +}; + +enum ehca_ext_qp_type { + EQPT_NORMAL = 0, + EQPT_LLQP = 1, + EQPT_SRQBASE = 2, + EQPT_SRQ = 3, }; struct ehca_qp { - struct ib_qp ib_qp; + union { + struct ib_qp ib_qp; + struct ib_srq ib_srq; + }; u32 qp_type; + enum ehca_ext_qp_type ext_type; struct ipz_queue ipz_squeue; struct ipz_queue ipz_rqueue; struct h_galpas galpas; @@ -140,6 +170,10 @@ struct ehca_qp { u32 mm_count_galpa; }; +#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) +#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) +#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) + /* must be power of 2 */ #define QP_HASHTAB_LEN 8 @@ -156,8 +190,8 @@ struct ehca_cq { spinlock_t cb_lock; struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; struct list_head entry; - u32 nr_callbacks; /* #events assigned to cpu by scaling code */ - u32 nr_events; /* #events seen */ + u32 nr_callbacks; /* #events assigned to cpu by scaling code */ + atomic_t nr_events; /* #events seen */ wait_queue_head_t wait_completion; spinlock_t task_lock; u32 ownpid; @@ -180,11 +214,12 @@ struct ehca_mr { spinlock_t mrlock; enum ehca_mr_flag flags; - u32 num_pages; /* number of MR pages */ - u32 num_4k; /* number of 4k "page" portions to form MR */ + u32 num_kpages; /* number of kernel pages */ + u32 num_hwpages; /* number of hw pages to form MR */ + u64 hwpage_size; /* hw page size used for this MR */ int acl; /* ACL (stored here for usage in reregister) */ u64 *start; /* virtual start address (stored here for */ - /* usage in reregister) */ + /* usage in reregister) */ u64 size; /* size (stored here for usage in reregister) */ u32 fmr_page_size; /* page size for FMR */ u32 fmr_max_pages; /* max pages for FMR */ @@ -193,9 +228,6 @@ struct ehca_mr { /* fw specific data */ struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ struct h_galpas galpas; - /* data for userspace bridge */ - u32 nr_of_pages; - void *pagearray; }; struct ehca_mw { @@ -217,26 +249,30 @@ enum ehca_mr_pgi_type { struct ehca_mr_pginfo { enum ehca_mr_pgi_type type; - u64 num_pages; - u64 page_cnt; - u64 num_4k; /* number of 4k "page" portions */ - u64 page_4k_cnt; /* counter for 4k "page" portions */ - u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */ - - /* type EHCA_MR_PGI_PHYS section */ - int num_phys_buf; - struct ib_phys_buf *phys_buf_array; - u64 next_buf; - - /* type EHCA_MR_PGI_USER section */ - struct ib_umem *region; - struct ib_umem_chunk *next_chunk; - u64 next_nmap; - - /* type EHCA_MR_PGI_FMR section */ - u64 *page_list; - u64 next_listelem; - /* next_4k also used within EHCA_MR_PGI_FMR */ + u64 num_kpages; + u64 kpage_cnt; + u64 hwpage_size; /* hw page size used for this MR */ + u64 num_hwpages; /* number of hw pages */ + u64 hwpage_cnt; /* counter for hw pages */ + u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ + + union { + struct { /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + } phy; + struct { /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct ib_umem_chunk *next_chunk; + u64 next_nmap; + } usr; + struct { /* type EHCA_MR_PGI_FMR section */ + u64 fmr_pgsize; + u64 *page_list; + u64 next_listelem; + } fmr; + } u; }; /* output parameters for MR/FMR hipz calls */ @@ -274,10 +310,11 @@ int ehca_init_av_cache(void); void ehca_cleanup_av_cache(void); int ehca_init_mrmw_cache(void); void ehca_cleanup_mrmw_cache(void); +int ehca_init_small_qp_cache(void); +void ehca_cleanup_small_qp_cache(void); -extern spinlock_t ehca_qp_idr_lock; -extern spinlock_t ehca_cq_idr_lock; -extern spinlock_t hcall_lock; +extern rwlock_t ehca_qp_idr_lock; +extern rwlock_t ehca_cq_idr_lock; extern struct idr ehca_qp_idr; extern struct idr ehca_cq_idr; @@ -292,7 +329,7 @@ struct ipzu_queue_resp { u32 queue_length; /* queue length allocated in bytes */ u32 pagesize; u32 toggle_state; - u32 dummy; /* padding for 8 byte alignment */ + u32 offset; /* save offset within a page for small_qp */ }; struct ehca_create_cq_resp { @@ -305,6 +342,7 @@ struct ehca_create_qp_resp { u32 qp_num; u32 token; u32 qp_type; + u32 ext_type; u32 qkey; /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ u32 real_qp_num; @@ -320,28 +358,59 @@ struct ehca_alloc_cq_parms { struct ipz_eq_handle eq_handle; }; +enum ehca_service_type { + ST_RC = 0, + ST_UC = 1, + ST_RD = 2, + ST_UD = 3, +}; + +enum ehca_ll_comp_flags { + LLQP_SEND_COMP = 0x20, + LLQP_RECV_COMP = 0x40, + LLQP_COMP_MASK = 0x60, +}; + +struct ehca_alloc_queue_parms { + /* input parameters */ + int max_wr; + int max_sge; + int page_size; + int is_small; + + /* output parameters */ + u16 act_nr_wqes; + u8 act_nr_sges; + u32 queue_size; /* bytes for small queues, pages otherwise */ +}; + struct ehca_alloc_qp_parms { - int servicetype; + struct ehca_alloc_queue_parms squeue; + struct ehca_alloc_queue_parms rqueue; + + /* input parameters */ + enum ehca_service_type servicetype; + int qp_storage; int sigtype; - int daqp_ctrl; - int max_send_sge; - int max_recv_sge; + enum ehca_ext_qp_type ext_type; + enum ehca_ll_comp_flags ll_comp_flags; int ud_av_l_key_ctl; - u16 act_nr_send_wqes; - u16 act_nr_recv_wqes; - u8 act_nr_recv_sges; - u8 act_nr_send_sges; + u32 token; + struct ipz_eq_handle eq_handle; + struct ipz_pd pd; + struct ipz_cq_handle send_cq_handle, recv_cq_handle; - u32 nr_rq_pages; - u32 nr_sq_pages; + u32 srq_qpn, srq_token, srq_limit; - struct ipz_eq_handle ipz_eq_handle; - struct ipz_pd pd; + /* output parameters */ + u32 real_qp_num; + struct ipz_qp_handle qp_handle; + struct h_galpas galpas; }; int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); -struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); #endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index 5665f213b81a..1798e6466bd0 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -154,83 +154,83 @@ struct hcp_modify_qp_control_block { u32 reserved_70_127[58]; /* 70 */ }; -#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0) -#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2) -#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3) -#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4) -#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5) -#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6) -#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7) -#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8) -#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9) -#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11) -#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12) -#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13) -#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14) -#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15) -#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16) -#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17) -#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18) -#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19) -#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20) -#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21) -#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22) -#define MQPCB_DLID EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23) -#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24) -#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31) -#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25) -#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26) -#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27) -#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28) -#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31) -#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30) -#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31) -#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31) -#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32) -#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31) -#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33) -#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34) -#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31) -#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35) -#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36) -#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37) -#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38) -#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31) -#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39) -#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40) -#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41) -#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42) -#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31) -#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44) -#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45) -#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46) -#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47) -#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31) -#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) -#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) -#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49) -#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31) -#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) -#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) +#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) +#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) +#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) +#define MQPCB_DLID EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) +#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) +#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) +#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) +#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) +#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) +#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) +#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) +#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) +#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) +#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) +#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) +#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) +#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) +#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) +#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) +#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) +#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) +#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) +#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) +#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31) +#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) +#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) +#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) #endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 67f0670fe3b1..81aff36101ba 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -56,11 +56,11 @@ int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) { unsigned int qp_num = qp->real_qp_num; unsigned int key = qp_num & (QP_HASHTAB_LEN-1); - unsigned long spl_flags; + unsigned long flags; - spin_lock_irqsave(&cq->spinlock, spl_flags); + spin_lock_irqsave(&cq->spinlock, flags); hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); - spin_unlock_irqrestore(&cq->spinlock, spl_flags); + spin_unlock_irqrestore(&cq->spinlock, flags); ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", cq->cq_number, qp_num); @@ -74,9 +74,9 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); struct hlist_node *iter; struct ehca_qp *qp; - unsigned long spl_flags; + unsigned long flags; - spin_lock_irqsave(&cq->spinlock, spl_flags); + spin_lock_irqsave(&cq->spinlock, flags); hlist_for_each(iter, &cq->qp_hashtab[key]) { qp = hlist_entry(iter, struct ehca_qp, list_entries); if (qp->real_qp_num == real_qp_num) { @@ -88,7 +88,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) break; } } - spin_unlock_irqrestore(&cq->spinlock, spl_flags); + spin_unlock_irqrestore(&cq->spinlock, flags); if (ret) ehca_err(cq->ib_cq.device, "qp not found cq_num=%x real_qp_num=%x", @@ -97,7 +97,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) return ret; } -struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) { struct ehca_qp *ret = NULL; unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); @@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, spin_lock_init(&my_cq->spinlock); spin_lock_init(&my_cq->cb_lock); spin_lock_init(&my_cq->task_lock); + atomic_set(&my_cq->nr_events, 0); init_waitqueue_head(&my_cq->wait_completion); my_cq->ownpid = current->tgid; @@ -162,9 +163,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, goto create_cq_exit1; } - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + write_lock_irqsave(&ehca_cq_idr_lock, flags); ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); } while (ret == -EAGAIN); @@ -189,8 +190,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, goto create_cq_exit2; } - ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages, - EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0); + ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, + EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); if (!ipz_rc) { ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p", ipz_rc, device); @@ -284,7 +285,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, return cq; create_cq_exit4: - ipz_queue_dtor(&my_cq->ipz_queue); + ipz_queue_dtor(NULL, &my_cq->ipz_queue); create_cq_exit3: h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); @@ -293,9 +294,9 @@ create_cq_exit3: "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret); create_cq_exit2: - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + write_lock_irqsave(&ehca_cq_idr_lock, flags); idr_remove(&ehca_cq_idr, my_cq->token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); create_cq_exit1: kmem_cache_free(cq_cache, my_cq); @@ -303,16 +304,6 @@ create_cq_exit1: return cq; } -static int get_cq_nr_events(struct ehca_cq *my_cq) -{ - int ret; - unsigned long flags; - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - ret = my_cq->nr_events; - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - return ret; -} - int ehca_destroy_cq(struct ib_cq *cq) { u64 h_ret; @@ -339,17 +330,18 @@ int ehca_destroy_cq(struct ib_cq *cq) } } - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - while (my_cq->nr_events) { - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq)); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - /* recheck nr_events to assure no cqe has just arrived */ - } - + /* + * remove the CQ from the idr first to make sure + * no more interrupt tasklets will touch this CQ + */ + write_lock_irqsave(&ehca_cq_idr_lock, flags); idr_remove(&ehca_cq_idr, my_cq->token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + /* now wait until all pending events have completed */ + wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); + /* nobody's using our CQ any longer -- we can destroy it */ h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); if (h_ret == H_R_STATE) { /* cq in err: read err data and destroy it forcibly */ @@ -367,7 +359,7 @@ int ehca_destroy_cq(struct ib_cq *cq) "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); return ehca2ib_return_code(h_ret); } - ipz_queue_dtor(&my_cq->ipz_queue); + ipz_queue_dtor(NULL, &my_cq->ipz_queue); kmem_cache_free(cq_cache, my_cq); return 0; @@ -395,7 +387,7 @@ int ehca_init_cq_cache(void) cq_cache = kmem_cache_create("ehca_cache_cq", sizeof(struct ehca_cq), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!cq_cache) return -ENOMEM; return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 4961eb88827c..1d41faa7a337 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -86,8 +86,8 @@ int ehca_create_eq(struct ehca_shca *shca, return -EINVAL; } - ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages, - EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0); + ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, + EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); if (!ret) { ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); goto create_eq_exit1; @@ -96,7 +96,8 @@ int ehca_create_eq(struct ehca_shca *shca, for (i = 0; i < nr_pages; i++) { u64 rpage; - if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) { + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (!vpage) { ret = H_RESOURCE; goto create_eq_exit2; } @@ -144,7 +145,7 @@ int ehca_create_eq(struct ehca_shca *shca, return 0; create_eq_exit2: - ipz_queue_dtor(&eq->ipz_queue); + ipz_queue_dtor(NULL, &eq->ipz_queue); create_eq_exit1: hipz_h_destroy_eq(shca->ipz_hca_handle, eq); @@ -180,7 +181,7 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) ehca_err(&shca->ib_device, "Can't free EQ resources."); return -EINVAL; } - ipz_queue_dtor(&eq->ipz_queue); + ipz_queue_dtor(NULL, &eq->ipz_queue); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 32b55a4f0e5b..fc19ef9fd963 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -45,11 +45,25 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { - int ret = 0; + int i, ret = 0; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_hca *rblock; + static const u32 cap_mapping[] = { + IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, + IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, + IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, + IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, + IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, + IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, + IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, + IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, + IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, + IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, + IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, + }; + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); @@ -96,6 +110,13 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) props->max_total_mcast_qp_attach = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX); + /* translate device capabilities */ + props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; + for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) + if (rblock->hca_cap_indicators & cap_mapping[i + 1]) + props->device_cap_flags |= cap_mapping[i]; + query_device1: ehca_free_fw_ctrlblock(rblock); @@ -106,6 +127,7 @@ int ehca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { int ret = 0; + u64 h_ret; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_port *rblock; @@ -116,7 +138,8 @@ int ehca_query_port(struct ib_device *ibdev, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_port1; @@ -172,12 +195,50 @@ query_port1: return ret; } +int ehca_query_sma_attr(struct ehca_shca *shca, + u8 port, struct ehca_sma_attr *attr) +{ + int ret = 0; + u64 h_ret; + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_sma_attr1; + } + + memset(attr, 0, sizeof(struct ehca_sma_attr)); + + attr->lid = rblock->lid; + attr->lmc = rblock->lmc; + attr->sm_sl = rblock->sm_sl; + attr->sm_lid = rblock->sm_lid; + + attr->pkey_tbl_len = rblock->pkey_tbl_len; + memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); + +query_sma_attr1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { int ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + u64 h_ret; + struct ehca_shca *shca; struct hipz_query_port *rblock; + shca = container_of(ibdev, struct ehca_shca, ib_device); if (index > 16) { ehca_err(&shca->ib_device, "Invalid index: %x.", index); return -EINVAL; @@ -189,7 +250,8 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_pkey1; @@ -207,6 +269,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { int ret = 0; + u64 h_ret; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_port *rblock; @@ -222,7 +285,8 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_gid1; @@ -247,11 +311,12 @@ int ehca_modify_port(struct ib_device *ibdev, struct ib_port_modify *props) { int ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + struct ehca_shca *shca; struct hipz_query_port *rblock; u32 cap; u64 hret; + shca = container_of(ibdev, struct ehca_shca, ib_device); if ((props->set_port_cap_mask | props->clr_port_cap_mask) & ~allowed_port_caps) { ehca_err(&shca->ib_device, "Non-changeable bits set in masks " @@ -261,7 +326,7 @@ int ehca_modify_port(struct ib_device *ibdev, } if (mutex_lock_interruptible(&shca->modify_mutex)) - return -ERESTARTSYS; + return -ERESTARTSYS; rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { @@ -270,7 +335,8 @@ int ehca_modify_port(struct ib_device *ibdev, goto modify_port1; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (hret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto modify_port2; @@ -282,7 +348,8 @@ int ehca_modify_port(struct ib_device *ibdev, hret = hipz_h_modify_port(shca->ipz_hca_handle, port, cap, props->init_type, port_modify_mask); if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret); + ehca_err(&shca->ib_device, "Modify port failed hret=%lx", + hret); ret = -EINVAL; } @@ -290,7 +357,7 @@ modify_port2: ehca_free_fw_ctrlblock(rblock); modify_port1: - mutex_unlock(&shca->modify_mutex); + mutex_unlock(&shca->modify_mutex); return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 100329ba3343..71c0799b3500 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -5,6 +5,8 @@ * * Authors: Heiko J Schick <schickhj@de.ibm.com> * Khadija Souissi <souissi@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> * * Copyright (c) 2005 IBM Corporation * @@ -47,25 +49,26 @@ #include "hipz_fns.h" #include "ipz_pt_fn.h" -#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) -#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) -#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) -#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) -#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) -#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) -#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) -#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) static void queue_comp_task(struct ehca_cq *__cq); -static struct ehca_comp_pool* pool; +static struct ehca_comp_pool *pool; #ifdef CONFIG_HOTPLUG_CPU static struct notifier_block comp_pool_callback_nb; #endif @@ -82,8 +85,8 @@ static inline void comp_event_callback(struct ehca_cq *cq) return; } -static void print_error_data(struct ehca_shca * shca, void* data, - u64* rblock, int length) +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) { u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); u64 resource = rblock[1]; @@ -91,7 +94,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, switch (type) { case 0x1: /* Queue Pair */ { - struct ehca_qp *qp = (struct ehca_qp*)data; + struct ehca_qp *qp = (struct ehca_qp *)data; /* only print error data if AER is set */ if (rblock[6] == 0) @@ -104,7 +107,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, } case 0x4: /* Completion Queue */ { - struct ehca_cq *cq = (struct ehca_cq*)data; + struct ehca_cq *cq = (struct ehca_cq *)data; ehca_err(&shca->ib_device, "CQ 0x%x (resource=%lx) has errors.", @@ -172,33 +175,41 @@ error_data1: } -static void qp_event_callback(struct ehca_shca *shca, - u64 eqe, - enum ib_event_type event_type) +static void qp_event_callback(struct ehca_shca *shca, u64 eqe, + enum ib_event_type event_type, int fatal) { struct ib_event event; struct ehca_qp *qp; - unsigned long flags; u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + read_unlock(&ehca_qp_idr_lock); if (!qp) return; - ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + if (fatal) + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); - if (!qp->ib_qp.event_handler) - return; + event.device = &shca->ib_device; - event.device = &shca->ib_device; - event.event = event_type; - event.element.qp = &qp->ib_qp; + if (qp->ext_type == EQPT_SRQ) { + if (!qp->ib_srq.event_handler) + return; - qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + event.event = fatal ? IB_EVENT_SRQ_ERR : event_type; + event.element.srq = &qp->ib_srq; + qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); + } else { + if (!qp->ib_qp.event_handler) + return; + + event.event = event_type; + event.element.qp = &qp->ib_qp; + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + } return; } @@ -207,18 +218,22 @@ static void cq_event_callback(struct ehca_shca *shca, u64 eqe) { struct ehca_cq *cq; - unsigned long flags; u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); if (!cq) return; ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + return; } @@ -228,17 +243,17 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) switch (identifier) { case 0x02: /* path migrated */ - qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG); + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); break; case 0x03: /* communication established */ - qp_event_callback(shca, eqe, IB_EVENT_COMM_EST); + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); break; case 0x04: /* send queue drained */ - qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED); + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); break; case 0x05: /* QP error */ case 0x06: /* QP error */ - qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL); + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); break; case 0x07: /* CQ error */ case 0x08: /* CQ error */ @@ -272,6 +287,11 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) ehca_err(&shca->ib_device, "Interface trace stopped."); break; case 0x14: /* first error capture info available */ + ehca_info(&shca->ib_device, "First error capture available"); + break; + case 0x15: /* SRQ limit reached */ + qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); + break; default: ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", identifier, shca->ib_device.name); @@ -281,30 +301,61 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) return; } -static void parse_ec(struct ehca_shca *shca, u64 eqe) +static void dispatch_port_event(struct ehca_shca *shca, int port_num, + enum ib_event_type type, const char *msg) { struct ib_event event; + + ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); + event.device = &shca->ib_device; + event.event = type; + event.element.port_num = port_num; + ib_dispatch_event(&event); +} + +static void notify_port_conf_change(struct ehca_shca *shca, int port_num) +{ + struct ehca_sma_attr new_attr; + struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; + + ehca_query_sma_attr(shca, port_num, &new_attr); + + if (new_attr.sm_sl != old_attr->sm_sl || + new_attr.sm_lid != old_attr->sm_lid) + dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, + "SM changed"); + + if (new_attr.lid != old_attr->lid || + new_attr.lmc != old_attr->lmc) + dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, + "LID changed"); + + if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || + memcmp(new_attr.pkeys, old_attr->pkeys, + sizeof(u16) * new_attr.pkey_tbl_len)) + dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, + "P_Key changed"); + + *old_attr = new_attr; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); switch (ec) { case 0x30: /* port availability change */ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - ehca_info(&shca->ib_device, - "port %x is active.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ACTIVE; - event.element.port_num = port; shca->sport[port - 1].port_state = IB_PORT_ACTIVE; - ib_dispatch_event(&event); + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, + &shca->sport[port - 1].saved_attr); } else { - ehca_info(&shca->ib_device, - "port %x is inactive.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port; shca->sport[port - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); } break; case 0x31: @@ -312,24 +363,19 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) * disruptive change is caused by * LID, PKEY or SM change */ - ehca_warn(&shca->ib_device, - "disruptive port %x configuration change", port); - - ehca_info(&shca->ib_device, - "port %x is inactive.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port; - shca->sport[port - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); - - ehca_info(&shca->ib_device, - "port %x is active.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ACTIVE; - event.element.port_num = port; - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; - ib_dispatch_event(&event); + if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { + ehca_warn(&shca->ib_device, "disruptive port " + "%d configuration change", port); + + shca->sport[port - 1].port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + + shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + } else + notify_port_conf_change(shca, port); break; case 0x32: /* adapter malfunction */ ehca_err(&shca->ib_device, "Adapter malfunction."); @@ -404,7 +450,6 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) { u64 eqe_value; u32 token; - unsigned long flags; struct ehca_cq *cq; eqe_value = eqe->entry; @@ -412,27 +457,24 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { ehca_dbg(&shca->ib_device, "Got completion event"); token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); if (cq == NULL) { - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); ehca_err(&shca->ib_device, "Invalid eqe for non-existing cq token=%x", token); return; } reset_eq_pending(cq); - cq->nr_events++; - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); if (ehca_scaling_code) queue_comp_task(cq); else { comp_event_callback(cq); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq->nr_events--; - if (!cq->nr_events) + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); } } else { ehca_dbg(&shca->ib_device, "Got non completion event"); @@ -476,17 +518,17 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) eqe_value = eqe_cache[eqe_cnt].eqe->entry; if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - spin_lock(&ehca_cq_idr_lock); + read_lock(&ehca_cq_idr_lock); eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (eqe_cache[eqe_cnt].cq) + atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); + read_unlock(&ehca_cq_idr_lock); if (!eqe_cache[eqe_cnt].cq) { - spin_unlock(&ehca_cq_idr_lock); ehca_err(&shca->ib_device, "Invalid eqe for non-existing cq " "token=%x", token); continue; } - eqe_cache[eqe_cnt].cq->nr_events++; - spin_unlock(&ehca_cq_idr_lock); } else eqe_cache[eqe_cnt].cq = NULL; eqe_cnt++; @@ -517,11 +559,8 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) else { struct ehca_cq *cq = eq->eqe_cache[i].cq; comp_event_callback(cq); - spin_lock(&ehca_cq_idr_lock); - cq->nr_events--; - if (!cq->nr_events) + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock(&ehca_cq_idr_lock); } } else { ehca_dbg(&shca->ib_device, "Got non completion event"); @@ -547,7 +586,7 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool* pool) +static inline int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; @@ -611,7 +650,7 @@ static void queue_comp_task(struct ehca_cq *__cq) __queue_comp_task(__cq, cct); } -static void run_comp_task(struct ehca_cpu_comp_task* cct) +static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; unsigned long flags; @@ -621,13 +660,10 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct) while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); spin_unlock_irqrestore(&cct->task_lock, flags); - comp_event_callback(cq); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq->nr_events--; - if (!cq->nr_events) + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); spin_lock_irqsave(&cct->task_lock, flags); spin_lock(&cq->task_lock); @@ -644,12 +680,12 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct) static int comp_task(void *__cct) { - struct ehca_cpu_comp_task* cct = __cct; + struct ehca_cpu_comp_task *cct = __cct; int cql_empty; DECLARE_WAITQUEUE(wait, current); set_current_state(TASK_INTERRUPTIBLE); - while(!kthread_should_stop()) { + while (!kthread_should_stop()) { add_wait_queue(&cct->wait_queue, &wait); spin_lock_irq(&cct->task_lock); @@ -723,7 +759,7 @@ static void take_over_work(struct ehca_comp_pool *pool, list_splice_init(&cct->cq_list, &list); - while(!list_empty(&list)) { + while (!list_empty(&list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); list_del(&cq->entry); @@ -746,7 +782,7 @@ static int comp_pool_callback(struct notifier_block *nfb, case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if(!create_comp_task(pool, cpu)) { + if (!create_comp_task(pool, cpu)) { ehca_gen_err("Can't create comp_task for cpu: %x", cpu); return NOTIFY_BAD; } @@ -816,7 +852,7 @@ int ehca_create_comp_pool(void) #ifdef CONFIG_HOTPLUG_CPU comp_pool_callback_nb.notifier_call = comp_pool_callback; - comp_pool_callback_nb.priority =0; + comp_pool_callback_nb.priority = 0; register_cpu_notifier(&comp_pool_callback_nb); #endif diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index 6ed06ee033ed..3346cb06cea6 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -47,7 +47,6 @@ struct ehca_shca; #include <linux/interrupt.h> #include <linux/types.h> -#include <asm/atomic.h> int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 37e7fe0908cf..dce503bb7d6b 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -49,6 +49,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props); int ehca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); +int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, + struct ehca_sma_attr *attr); + int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, @@ -78,8 +81,9 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, int num_phys_buf, int mr_access_flags, u64 *iova_start); -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, - int mr_access_flags, struct ib_udata *udata); +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata); int ehca_rereg_phys_mr(struct ib_mr *mr, int mr_rereg_mask, @@ -154,6 +158,21 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); + +int ehca_destroy_srq(struct ib_srq *srq); + u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, struct ib_qp_init_attr *qp_init_attr); @@ -174,7 +193,7 @@ void ehca_poll_eqs(unsigned long data); void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); #else -#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) +#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index c3f99f33b49c..99036b65bb84 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -63,6 +63,7 @@ int ehca_port_act_time = 30; int ehca_poll_all_eqs = 1; int ehca_static_rate = -1; int ehca_scaling_code = 0; +int ehca_mr_largepage = 0; module_param_named(open_aqp1, ehca_open_aqp1, int, 0); module_param_named(debug_level, ehca_debug_level, int, 0); @@ -72,7 +73,8 @@ module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0); module_param_named(port_act_time, ehca_port_act_time, int, 0); module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0); module_param_named(static_rate, ehca_static_rate, int, 0); -module_param_named(scaling_code, ehca_scaling_code, int, 0); +module_param_named(scaling_code, ehca_scaling_code, int, 0); +module_param_named(mr_largepage, ehca_mr_largepage, int, 0); MODULE_PARM_DESC(open_aqp1, "AQP1 on startup (0: no (default), 1: yes)"); @@ -94,22 +96,23 @@ MODULE_PARM_DESC(poll_all_eqs, MODULE_PARM_DESC(static_rate, "set permanent static rate (default: disabled)"); MODULE_PARM_DESC(scaling_code, - "set scaling code (0: disabled, 1: enabled/default)"); + "set scaling code (0: disabled/default, 1: enabled)"); +MODULE_PARM_DESC(mr_largepage, + "use large page for MR (0: use PAGE_SIZE (default), " + "1: use large page depending on MR size"); -spinlock_t ehca_qp_idr_lock; -spinlock_t ehca_cq_idr_lock; -spinlock_t hcall_lock; +DEFINE_RWLOCK(ehca_qp_idr_lock); +DEFINE_RWLOCK(ehca_cq_idr_lock); DEFINE_IDR(ehca_qp_idr); DEFINE_IDR(ehca_cq_idr); - -static struct list_head shca_list; /* list of all registered ehcas */ -static spinlock_t shca_list_lock; +static LIST_HEAD(shca_list); /* list of all registered ehcas */ +static DEFINE_SPINLOCK(shca_list_lock); static struct timer_list poll_eqs_timer; #ifdef CONFIG_PPC_64K_PAGES -static struct kmem_cache *ctblk_cache = NULL; +static struct kmem_cache *ctblk_cache; void *ehca_alloc_fw_ctrlblock(gfp_t flags) { @@ -127,6 +130,23 @@ void ehca_free_fw_ctrlblock(void *ptr) } #endif +int ehca2ib_return_code(u64 ehca_rc) +{ + switch (ehca_rc) { + case H_SUCCESS: + return 0; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: + return -EBUSY; + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_CONSTRAINED: /* resource constraint */ + case H_NO_MEM: + return -ENOMEM; + default: + return -EINVAL; + } +} + static int ehca_create_slab_caches(void) { int ret; @@ -161,19 +181,28 @@ static int ehca_create_slab_caches(void) goto create_slab_caches5; } + ret = ehca_init_small_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create small queue SLAB cache."); + goto create_slab_caches6; + } + #ifdef CONFIG_PPC_64K_PAGES ctblk_cache = kmem_cache_create("ehca_cache_ctblk", EHCA_PAGESIZE, H_CB_ALIGNMENT, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!ctblk_cache) { ehca_gen_err("Cannot create ctblk SLAB cache."); - ehca_cleanup_mrmw_cache(); - goto create_slab_caches5; + ehca_cleanup_small_qp_cache(); + goto create_slab_caches6; } #endif return 0; +create_slab_caches6: + ehca_cleanup_mrmw_cache(); + create_slab_caches5: ehca_cleanup_av_cache(); @@ -191,6 +220,7 @@ create_slab_caches2: static void ehca_destroy_slab_caches(void) { + ehca_cleanup_small_qp_cache(); ehca_cleanup_mrmw_cache(); ehca_cleanup_av_cache(); ehca_cleanup_qp_cache(); @@ -202,14 +232,38 @@ static void ehca_destroy_slab_caches(void) #endif } -#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) -#define EHCA_REVID EHCA_BMASK_IBM(40,63) +#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) +#define EHCA_REVID EHCA_BMASK_IBM(40, 63) + +static struct cap_descr { + u64 mask; + char *descr; +} hca_cap_descr[] = { + { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, + { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, + { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, + { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, + { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, + { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, + { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, + { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, + { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, + { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, + { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, + { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, + { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, + { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, + { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, + { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, + { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, +}; int ehca_sense_attributes(struct ehca_shca *shca) { - int ret = 0; + int i, ret = 0; u64 h_ret; struct hipz_query_hca *rblock; + struct hipz_query_port *port; rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { @@ -222,7 +276,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) ehca_gen_err("Cannot query device properties. h_ret=%lx", h_ret); ret = -EPERM; - goto num_ports1; + goto sense_attributes1; } if (ehca_nr_ports == 1) @@ -241,19 +295,52 @@ int ehca_sense_attributes(struct ehca_shca *shca) ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); - if ((hcaaver == 1) && (revid == 0)) - shca->hw_level = 0; - else if ((hcaaver == 1) && (revid == 1)) - shca->hw_level = 1; - else if ((hcaaver == 1) && (revid == 2)) - shca->hw_level = 2; - } + if (hcaaver == 1) { + if (revid <= 3) + shca->hw_level = 0x10 | (revid + 1); + else + shca->hw_level = 0x14; + } else if (hcaaver == 2) { + if (revid == 0) + shca->hw_level = 0x21; + else if (revid == 0x10) + shca->hw_level = 0x22; + else if (revid == 0x20 || revid == 0x21) + shca->hw_level = 0x23; + } + + if (!shca->hw_level) { + ehca_gen_warn("unknown hardware version" + " - assuming default level"); + shca->hw_level = 0x22; + } + } else + shca->hw_level = ehca_hw_level; ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); shca->sport[0].rate = IB_RATE_30_GBPS; shca->sport[1].rate = IB_RATE_30_GBPS; -num_ports1: + shca->hca_cap = rblock->hca_cap_indicators; + ehca_gen_dbg(" ... HCA capabilities:"); + for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) + if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) + ehca_gen_dbg(" %s", hca_cap_descr[i].descr); + + shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported; + + port = (struct hipz_query_port *)rblock; + h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query port properties. h_ret=%lx", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + shca->max_mtu = port->max_mtu; + +sense_attributes1: ehca_free_fw_ctrlblock(rblock); return ret; } @@ -293,7 +380,7 @@ int ehca_init_device(struct ehca_shca *shca) strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); shca->ib_device.owner = THIS_MODULE; - shca->ib_device.uverbs_abi_ver = 6; + shca->ib_device.uverbs_abi_ver = 7; shca->ib_device.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -361,6 +448,20 @@ int ehca_init_device(struct ehca_shca *shca) /* shca->ib_device.process_mad = ehca_process_mad; */ shca->ib_device.mmap = ehca_mmap; + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + shca->ib_device.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + shca->ib_device.create_srq = ehca_create_srq; + shca->ib_device.modify_srq = ehca_modify_srq; + shca->ib_device.query_srq = ehca_query_srq; + shca->ib_device.destroy_srq = ehca_destroy_srq; + shca->ib_device.post_srq_recv = ehca_post_srq_recv; + } + return ret; } @@ -377,7 +478,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) return -EPERM; } - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10, 0); + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0); if (IS_ERR(ibcq)) { ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); return PTR_ERR(ibcq); @@ -523,6 +624,14 @@ static ssize_t ehca_show_adapter_handle(struct device *dev, } static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); +static ssize_t ehca_show_mr_largepage(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", ehca_mr_largepage); +} +static DEVICE_ATTR(mr_largepage, S_IRUGO, ehca_show_mr_largepage, NULL); + static struct attribute *ehca_dev_attrs[] = { &dev_attr_adapter_handle.attr, &dev_attr_num_ports.attr, @@ -539,6 +648,7 @@ static struct attribute *ehca_dev_attrs[] = { &dev_attr_cur_mw.attr, &dev_attr_max_pd.attr, &dev_attr_max_ah.attr, + &dev_attr_mr_largepage.attr, NULL }; @@ -604,7 +714,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, } /* create internal protection domain */ - ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL); + ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); if (IS_ERR(ibpd)) { ehca_err(&shca->ib_device, "Cannot create internal PD."); ret = PTR_ERR(ibpd); @@ -800,27 +910,22 @@ int __init ehca_module_init(void) printk(KERN_INFO "eHCA Infiniband Device Driver " "(Rel.: SVNEHCA_0023)\n"); - idr_init(&ehca_qp_idr); - idr_init(&ehca_cq_idr); - spin_lock_init(&ehca_qp_idr_lock); - spin_lock_init(&ehca_cq_idr_lock); - spin_lock_init(&hcall_lock); - INIT_LIST_HEAD(&shca_list); - spin_lock_init(&shca_list_lock); - - if ((ret = ehca_create_comp_pool())) { + ret = ehca_create_comp_pool(); + if (ret) { ehca_gen_err("Cannot create comp pool."); return ret; } - if ((ret = ehca_create_slab_caches())) { + ret = ehca_create_slab_caches(); + if (ret) { ehca_gen_err("Cannot create SLAB caches"); ret = -ENOMEM; goto module_init1; } - if ((ret = ibmebus_register_driver(&ehca_driver))) { + ret = ibmebus_register_driver(&ehca_driver); + if (ret) { ehca_gen_err("Cannot register eHCA device driver"); ret = -EINVAL; goto module_init2; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index add79bd44e39..c1b868b79d67 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -5,6 +5,7 @@ * * Authors: Dietmar Decker <ddecker@de.ibm.com> * Christoph Raisch <raisch@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> * * Copyright (c) 2005 IBM Corporation * @@ -48,17 +49,53 @@ #include "hcp_if.h" #include "hipz_hw.h" +#define NUM_CHUNKS(length, chunk_size) \ + (((length) + (chunk_size - 1)) / (chunk_size)) +/* max number of rpages (per hcall register_rpages) */ +#define MAX_RPAGES 512 + static struct kmem_cache *mr_cache; static struct kmem_cache *mw_cache; +enum ehca_mr_pgsize { + EHCA_MR_PGSIZE4K = 0x1000L, + EHCA_MR_PGSIZE64K = 0x10000L, + EHCA_MR_PGSIZE1M = 0x100000L, + EHCA_MR_PGSIZE16M = 0x1000000L +}; + +extern int ehca_mr_largepage; + +static u32 ehca_encode_hwpage_size(u32 pgsize) +{ + u32 idx = 0; + pgsize >>= 12; + /* + * map mr page size into hw code: + * 0, 1, 2, 3 for 4K, 64K, 1M, 64M + */ + while (!(pgsize & 1)) { + idx++; + pgsize >>= 4; + } + return idx; +} + +static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) +{ + if (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M) + return EHCA_MR_PGSIZE16M; + return EHCA_MR_PGSIZE4K; +} + static struct ehca_mr *ehca_mr_new(void) { struct ehca_mr *me; me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); - if (me) { + if (me) spin_lock_init(&me->mrlock); - } else + else ehca_gen_err("alloc failed"); return me; @@ -74,9 +111,9 @@ static struct ehca_mw *ehca_mw_new(void) struct ehca_mw *me; me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); - if (me) { + if (me) spin_lock_init(&me->mwlock); - } else + else ehca_gen_err("alloc failed"); return me; @@ -106,11 +143,12 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) goto get_dma_mr_exit0; } - ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE, + ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE, mr_access_flags, e_pd, &e_maxmr->ib.ib_mr.lkey, &e_maxmr->ib.ib_mr.rkey); if (ret) { + ehca_mr_delete(e_maxmr); ib_mr = ERR_PTR(ret); goto get_dma_mr_exit0; } @@ -144,9 +182,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); u64 size; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ if ((num_phys_buf <= 0) || !phys_buf_array) { ehca_err(pd->device, "bad input values: num_phys_buf=%x " @@ -190,12 +225,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, goto reg_phys_mr_exit0; } - /* determine number of MR pages */ - num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - /* register MR on HCA */ if (ehca_mr_is_maxmr(size, iova_start)) { e_mr->flags |= EHCA_MR_FLAG_MAXMR; @@ -207,13 +236,26 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, goto reg_phys_mr_exit1; } } else { - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = num_phys_buf; - pginfo.phys_buf_array = phys_buf_array; - pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / - EHCA_PAGESIZE); + struct ehca_mr_pginfo pginfo; + u32 num_kpages; + u32 num_hwpages; + u64 hw_pgsize; + + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, + PAGE_SIZE); + /* for kernel space we try most possible pgsize */ + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, + hw_pgsize); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = + ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, @@ -240,18 +282,20 @@ reg_phys_mr_exit0: /*----------------------------------------------------------------------*/ -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, - int mr_access_flags, struct ib_udata *udata) +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata) { struct ib_mr *ib_mr; struct ehca_mr *e_mr; struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; int ret; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ + u32 num_kpages; + u32 num_hwpages; + u64 hwpage_size; if (!pd) { ehca_gen_err("bad pd=%p", pd); @@ -289,7 +333,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt e_mr->umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags); if (IS_ERR(e_mr->umem)) { - ib_mr = (void *) e_mr->umem; + ib_mr = (void *)e_mr->umem; goto reg_user_mr_exit1; } @@ -301,23 +345,52 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt } /* determine number of MR pages */ - num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) / - PAGE_SIZE); - num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) / - EHCA_PAGESIZE); + num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); + /* select proper hw_pgsize */ + if (ehca_mr_largepage && + (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) { + if (length <= EHCA_MR_PGSIZE4K + && PAGE_SIZE == EHCA_MR_PGSIZE4K) + hwpage_size = EHCA_MR_PGSIZE4K; + else if (length <= EHCA_MR_PGSIZE64K) + hwpage_size = EHCA_MR_PGSIZE64K; + else if (length <= EHCA_MR_PGSIZE1M) + hwpage_size = EHCA_MR_PGSIZE1M; + else + hwpage_size = EHCA_MR_PGSIZE16M; + } else + hwpage_size = EHCA_MR_PGSIZE4K; + ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size); +reg_user_mr_fallback: + num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); /* register MR on HCA */ - pginfo.type = EHCA_MR_PGI_USER; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.region = e_mr->umem; - pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE; - pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, - (&e_mr->umem->chunk_list), - list); - - ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd, - &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_USER; + pginfo.hwpage_size = hwpage_size; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.usr.region = e_mr->umem; + pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; + pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, + (&e_mr->umem->chunk_list), + list); + + ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { + ehca_warn(pd->device, "failed to register mr " + "with hwpage_size=%lx", hwpage_size); + ehca_info(pd->device, "try to register mr with " + "kpage_size=%lx", PAGE_SIZE); + /* + * this means kpages are not contiguous for a hw page + * try kernel page size as fallback solution + */ + hwpage_size = PAGE_SIZE; + goto reg_user_mr_fallback; + } if (ret) { ib_mr = ERR_PTR(ret); goto reg_user_mr_exit2; @@ -360,9 +433,9 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, struct ehca_pd *new_pd; u32 tmp_lkey, tmp_rkey; unsigned long sl_flags; - u32 num_pages_mr = 0; - u32 num_pages_4k = 0; /* 4k portion "pages" */ - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 num_kpages = 0; + u32 num_hwpages = 0; + struct ehca_mr_pginfo pginfo; u32 cur_pid = current->tgid; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && @@ -414,7 +487,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, goto rereg_phys_mr_exit0; } if (!phys_buf_array || num_phys_buf <= 0) { - ehca_err(mr->device, "bad input values: mr_rereg_mask=%x" + ehca_err(mr->device, "bad input values mr_rereg_mask=%x" " phys_buf_array=%p num_phys_buf=%x", mr_rereg_mask, phys_buf_array, num_phys_buf); ret = -EINVAL; @@ -438,12 +511,14 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, /* set requested values dependent on rereg request */ spin_lock_irqsave(&e_mr->mrlock, sl_flags); - new_start = e_mr->start; /* new == old address */ - new_size = e_mr->size; /* new == old length */ - new_acl = e_mr->acl; /* new == old access control */ - new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/ + new_start = e_mr->start; + new_size = e_mr->size; + new_acl = e_mr->acl; + new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); if (mr_rereg_mask & IB_MR_REREG_TRANS) { + u64 hw_pgsize = ehca_get_max_hwpage_size(shca); + new_start = iova_start; /* change address */ /* check physical buffer list and calculate size */ ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, @@ -458,17 +533,19 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, ret = -EINVAL; goto rereg_phys_mr_exit1; } - num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = num_phys_buf; - pginfo.phys_buf_array = phys_buf_array; - pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / - EHCA_PAGESIZE); + num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + + new_size, PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) + + new_size, hw_pgsize); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = + ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; } if (mr_rereg_mask & IB_MR_REREG_ACCESS) new_acl = mr_access_flags; @@ -510,7 +587,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); u32 cur_pid = current->tgid; unsigned long sl_flags; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && (my_pd->ownpid != cur_pid)) { @@ -536,14 +613,14 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) "hca_hndl=%lx mr_hndl=%lx lkey=%x", h_ret, mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca_mrmw_map_hrc_query_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto query_mr_exit1; } - mr_attr->pd = mr->pd; + mr_attr->pd = mr->pd; mr_attr->device_virt_addr = hipzout.vaddr; - mr_attr->size = hipzout.len; - mr_attr->lkey = hipzout.lkey; - mr_attr->rkey = hipzout.rkey; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); query_mr_exit1: @@ -596,7 +673,7 @@ int ehca_dereg_mr(struct ib_mr *mr) "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", h_ret, shca, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto dereg_mr_exit0; } @@ -622,7 +699,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_mw_hipzout_parms hipzout = {{0},0}; + struct ehca_mw_hipzout_parms hipzout; e_mw = ehca_mw_new(); if (!e_mw) { @@ -636,7 +713,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx " "shca=%p hca_hndl=%lx mw=%p", h_ret, shca, shca->ipz_hca_handle.handle, e_mw); - ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret)); + ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); goto alloc_mw_exit1; } /* successful MW allocation */ @@ -679,7 +756,7 @@ int ehca_dealloc_mw(struct ib_mw *mw) "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx", h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, e_mw->ipz_mw_handle.handle); - return ehca_mrmw_map_hrc_free_mw(h_ret); + return ehca2ib_return_code(h_ret); } /* successful deallocation */ ehca_mw_delete(e_mw); @@ -699,7 +776,8 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, struct ehca_mr *e_fmr; int ret; u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; + u64 hw_pgsize; /* check other parameters */ if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && @@ -729,8 +807,8 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, ib_fmr = ERR_PTR(-EINVAL); goto alloc_fmr_exit0; } - if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) && - ((1 << fmr_attr->page_shift) != PAGE_SIZE)) { + hw_pgsize = ehca_get_max_hwpage_size(shca); + if ((1 << fmr_attr->page_shift) != hw_pgsize) { ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", fmr_attr->page_shift); ib_fmr = ERR_PTR(-EINVAL); @@ -745,6 +823,11 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, e_fmr->flags |= EHCA_MR_FLAG_FMR; /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); + /* + * pginfo.num_hwpages==0, ie register_rpages() will not be called + * but deferred to map_phys_fmr() + */ ret = ehca_reg_mr(shca, e_fmr, NULL, fmr_attr->max_pages * (1 << fmr_attr->page_shift), mr_access_flags, e_pd, &pginfo, @@ -755,6 +838,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, } /* successful */ + e_fmr->hwpage_size = hw_pgsize; e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; e_fmr->fmr_max_pages = fmr_attr->max_pages; e_fmr->fmr_max_maps = fmr_attr->max_maps; @@ -783,7 +867,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, container_of(fmr->device, struct ehca_shca, ib_device); struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; u32 tmp_lkey, tmp_rkey; if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { @@ -809,14 +893,18 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); } - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_pages = list_len; - pginfo.num_4k = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); - pginfo.page_list = page_list; - pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) / - EHCA_PAGESIZE); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = list_len; + pginfo.hwpage_size = e_fmr->hwpage_size; + pginfo.num_hwpages = + list_len * e_fmr->fmr_page_size / pginfo.hwpage_size; + pginfo.u.fmr.page_list = page_list; + pginfo.next_hwpage = + (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size; + pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; - ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova, + ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, list_len * e_fmr->fmr_page_size, e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); if (ret) @@ -831,8 +919,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, map_phys_fmr_exit0: if (ret) ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x " - "iova=%lx", - ret, fmr, page_list, list_len, iova); + "iova=%lx", ret, fmr, page_list, list_len, iova); return ret; } /* end ehca_map_phys_fmr() */ @@ -922,7 +1009,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr) "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", h_ret, e_fmr, shca->ipz_hca_handle.handle, e_fmr->ipz_mr_handle.handle, fmr->lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto free_fmr_exit0; } /* successful deregistration */ @@ -950,12 +1037,12 @@ int ehca_reg_mr(struct ehca_shca *shca, int ret; u64 h_ret; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); if (ehca_use_hp_mr == 1) - hipz_acl |= 0x00000001; + hipz_acl |= 0x00000001; h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, (u64)iova_start, size, hipz_acl, @@ -963,7 +1050,7 @@ int ehca_reg_mr(struct ehca_shca *shca, if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx " "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle); - ret = ehca_mrmw_map_hrc_alloc(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_reg_mr_exit0; } @@ -974,11 +1061,12 @@ int ehca_reg_mr(struct ehca_shca *shca, goto ehca_reg_mr_exit1; /* successful registration */ - e_mr->num_pages = pginfo->num_pages; - e_mr->num_4k = pginfo->num_4k; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; *lkey = hipzout.lkey; *rkey = hipzout.rkey; return 0; @@ -988,10 +1076,10 @@ ehca_reg_mr_exit1: if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x " - "pginfo=%p num_pages=%lx num_4k=%lx ret=%x", + "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%x", h_ret, shca, e_mr, iova_start, size, acl, e_pd, - hipzout.lkey, pginfo, pginfo->num_pages, - pginfo->num_4k, ret); + hipzout.lkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages, ret); ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " "not recoverable"); } @@ -999,9 +1087,9 @@ ehca_reg_mr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_pages=%lx num_4k=%lx", + "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, - pginfo->num_pages, pginfo->num_4k); + pginfo->num_kpages, pginfo->num_hwpages); return ret; } /* end ehca_reg_mr() */ @@ -1018,6 +1106,9 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, u32 i; u64 *kpage; + if (!pginfo->num_hwpages) /* in case of fmr */ + return 0; + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!kpage) { ehca_err(&shca->ib_device, "kpage alloc failed"); @@ -1025,25 +1116,25 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, goto ehca_reg_mr_rpages_exit0; } - /* max 512 pages per shot */ - for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) { + /* max MAX_RPAGES ehca mr pages per register call */ + for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { - if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { - rnum = pginfo->num_4k % 512; /* last shot */ + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ if (rnum == 0) - rnum = 512; /* last shot is full */ + rnum = MAX_RPAGES; /* last shot is full */ } else - rnum = 512; + rnum = MAX_RPAGES; + + ret = ehca_set_pagebuf(pginfo, rnum, kpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf " + "bad rc, ret=%x rnum=%x kpage=%p", + ret, rnum, kpage); + goto ehca_reg_mr_rpages_exit1; + } if (rnum > 1) { - ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf " - "bad rc, ret=%x rnum=%x kpage=%p", - ret, rnum, kpage); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } rpage = virt_to_abs(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p i=%x", @@ -1051,21 +1142,15 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, ret = -EFAULT; goto ehca_reg_mr_rpages_exit1; } - } else { /* rnum==1 */ - ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 " - "bad rc, ret=%x i=%x", ret, i); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } - } + } else + rpage = *kpage; - h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr, - 0, /* pagesize 4k */ - 0, rpage, rnum); + h_ret = hipz_h_register_rpage_mr( + shca->ipz_hca_handle, e_mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); - if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { /* * check for 'registration complete'==H_SUCCESS * and for 'page registered'==H_PAGE_REGISTERED @@ -1078,7 +1163,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, e_mr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_rrpg_last(h_ret); + ret = ehca2ib_return_code(h_ret); break; } else ret = 0; @@ -1089,7 +1174,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, e_mr->ib.ib_mr.lkey, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle); - ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret); + ret = ehca2ib_return_code(h_ret); break; } else ret = 0; @@ -1101,8 +1186,8 @@ ehca_reg_mr_rpages_exit1: ehca_reg_mr_rpages_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p " - "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo, - pginfo->num_pages, pginfo->num_4k); + "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, + pginfo, pginfo->num_kpages, pginfo->num_hwpages); return ret; } /* end ehca_reg_mr_rpages() */ @@ -1124,10 +1209,10 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, u64 *kpage; u64 rpage; struct ehca_mr_pginfo pginfo_save; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!kpage) { @@ -1137,12 +1222,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, } pginfo_save = *pginfo; - ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage); + ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); if (ret) { ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " - "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p", - e_mr, pginfo, pginfo->type, pginfo->num_pages, - pginfo->num_4k,kpage); + "pginfo=%p type=%x num_kpages=%lx num_hwpages=%lx " + "kpage=%p", e_mr, pginfo, pginfo->type, + pginfo->num_kpages, pginfo->num_hwpages, kpage); goto ehca_rereg_mr_rereg1_exit1; } rpage = virt_to_abs(kpage); @@ -1164,7 +1249,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr); *pginfo = pginfo_save; ret = -EAGAIN; - } else if ((u64*)hipzout.vaddr != iova_start) { + } else if ((u64 *)hipzout.vaddr != iova_start) { ehca_err(&shca->ib_device, "PHYP changed iova_start in " "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p " "mr_handle=%lx lkey=%x lkey_out=%x", iova_start, @@ -1176,11 +1261,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, * successful reregistration * note: start and start_out are identical for eServer HCAs */ - e_mr->num_pages = pginfo->num_pages; - e_mr->num_4k = pginfo->num_4k; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; *lkey = hipzout.lkey; *rkey = hipzout.rkey; } @@ -1190,9 +1276,9 @@ ehca_rereg_mr_rereg1_exit1: ehca_rereg_mr_rereg1_exit0: if ( ret && (ret != -EAGAIN) ) ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x " - "pginfo=%p num_pages=%lx num_4k=%lx", - ret, *lkey, *rkey, pginfo, pginfo->num_pages, - pginfo->num_4k); + "pginfo=%p num_kpages=%lx num_hwpages=%lx", + ret, *lkey, *rkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages); return ret; } /* end ehca_rereg_mr_rereg1() */ @@ -1214,10 +1300,12 @@ int ehca_rereg_mr(struct ehca_shca *shca, int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ /* first determine reregistration hCall(s) */ - if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) || - (pginfo->num_4k > e_mr->num_4k)) { - ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx " - "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k); + if ((pginfo->num_hwpages > MAX_RPAGES) || + (e_mr->num_hwpages > MAX_RPAGES) || + (pginfo->num_hwpages > e_mr->num_hwpages)) { + ehca_dbg(&shca->ib_device, "Rereg3 case, " + "pginfo->num_hwpages=%lx e_mr->num_hwpages=%x", + pginfo->num_hwpages, e_mr->num_hwpages); rereg_1_hcall = 0; rereg_3_hcall = 1; } @@ -1253,7 +1341,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, h_ret, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, e_mr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_rereg_mr_exit0; } /* clean ehca_mr_t, without changing struct ib_mr and lock */ @@ -1262,13 +1350,14 @@ int ehca_rereg_mr(struct ehca_shca *shca, /* set some MR values */ e_mr->flags = save_mr.flags; + e_mr->hwpage_size = save_mr.hwpage_size; e_mr->fmr_page_size = save_mr.fmr_page_size; e_mr->fmr_max_pages = save_mr.fmr_max_pages; e_mr->fmr_max_maps = save_mr.fmr_max_maps; e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, - e_pd, pginfo, lkey, rkey); + e_pd, pginfo, lkey, rkey); if (ret) { u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; memcpy(&e_mr->flags, &(save_mr.flags), @@ -1281,9 +1370,9 @@ ehca_rereg_mr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " + "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, - acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey, + acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, rereg_1_hcall, rereg_3_hcall); return ret; } /* end ehca_rereg_mr() */ @@ -1295,97 +1384,84 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, { int ret = 0; u64 h_ret; - int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */ - int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */ struct ehca_pd *e_pd = container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); struct ehca_mr save_fmr; u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; - - /* first check if reregistration hCall can be used for unmap */ - if (e_fmr->fmr_max_pages > 512) { - rereg_1_hcall = 0; - rereg_3_hcall = 1; - } + struct ehca_mr_pginfo pginfo; + struct ehca_mr_hipzout_parms hipzout; + struct ehca_mr save_mr; - if (rereg_1_hcall) { + if (e_fmr->fmr_max_pages <= MAX_RPAGES) { /* * note: after using rereg hcall with len=0, * rereg hcall must be used again for registering pages */ h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, 0, 0, e_pd->fw_pd, 0, &hipzout); - if (h_ret != H_SUCCESS) { - /* - * should not happen, because length checked above, - * FMRs are not shared and no MW bound to FMRs - */ - ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " - "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " - "mr_hndl=%lx lkey=%x lkey_out=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey, hipzout.lkey); - rereg_3_hcall = 1; - } else { + if (h_ret == H_SUCCESS) { /* successful reregistration */ e_fmr->start = NULL; e_fmr->size = 0; tmp_lkey = hipzout.lkey; tmp_rkey = hipzout.rkey; + return 0; } + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " + "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " + "mr_hndl=%lx lkey=%x lkey_out=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + /* try free and rereg */ } - if (rereg_3_hcall) { - struct ehca_mr save_mr; - - /* first free old FMR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " - "lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); - goto ehca_unmap_one_fmr_exit0; - } - /* clean ehca_mr_t, without changing lock */ - save_fmr = *e_fmr; - ehca_mr_deletenew(e_fmr); - - /* set some MR values */ - e_fmr->flags = save_fmr.flags; - e_fmr->fmr_page_size = save_fmr.fmr_page_size; - e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; - e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; - e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; - e_fmr->acl = save_fmr.acl; - - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_pages = 0; - pginfo.num_4k = 0; - ret = ehca_reg_mr(shca, e_fmr, NULL, - (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey); - if (ret) { - u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; - memcpy(&e_fmr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - goto ehca_unmap_one_fmr_exit0; - } + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " + "lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->hwpage_size = save_fmr.hwpage_size; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); } ehca_unmap_one_fmr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x " - "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x", - ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages, - rereg_1_hcall, rereg_3_hcall); + "fmr_max_pages=%x", + ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); return ret; } /* end ehca_unmap_one_fmr() */ @@ -1403,10 +1479,10 @@ int ehca_reg_smr(struct ehca_shca *shca, int ret = 0; u64 h_ret; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, (u64)iova_start, hipz_acl, e_pd->fw_pd, @@ -1419,15 +1495,16 @@ int ehca_reg_smr(struct ehca_shca *shca, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, e_origmr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_reg_smr(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_reg_smr_exit0; } /* successful registration */ - e_newmr->num_pages = e_origmr->num_pages; - e_newmr->num_4k = e_origmr->num_4k; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; e_newmr->ipz_mr_handle = hipzout.handle; *lkey = hipzout.lkey; *rkey = hipzout.rkey; @@ -1453,10 +1530,11 @@ int ehca_reg_internal_maxmr( struct ehca_mr *e_mr; u64 *iova_start; u64 size_maxmr; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; struct ib_phys_buf ib_pbuf; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ + u32 num_kpages; + u32 num_hwpages; + u64 hw_pgsize; e_mr = ehca_mr_new(); if (!e_mr) { @@ -1468,28 +1546,31 @@ int ehca_reg_internal_maxmr( /* register internal max-MR on HCA */ size_maxmr = (u64)high_memory - PAGE_OFFSET; - iova_start = (u64*)KERNELBASE; + iova_start = (u64 *)KERNELBASE; ib_pbuf.addr = 0; ib_pbuf.size = size_maxmr; - num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = 1; - pginfo.phys_buf_array = &ib_pbuf; + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, + PAGE_SIZE); + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr, + hw_pgsize); + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.u.phy.num_phys_buf = 1; + pginfo.u.phy.phys_buf_array = &ib_pbuf; ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); if (ret) { ehca_err(&shca->ib_device, "reg of internal max MR failed, " - "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x " - "num_pages_4k=%x", e_mr, iova_start, size_maxmr, - num_pages_mr, num_pages_4k); + "e_mr=%p iova_start=%p size_maxmr=%lx num_kpages=%x " + "num_hwpages=%x", e_mr, iova_start, size_maxmr, + num_kpages, num_hwpages); goto ehca_reg_internal_maxmr_exit1; } @@ -1524,10 +1605,10 @@ int ehca_reg_maxmr(struct ehca_shca *shca, u64 h_ret; struct ehca_mr *e_origmr = shca->maxmr; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, (u64)iova_start, hipz_acl, e_pd->fw_pd, @@ -1538,14 +1619,15 @@ int ehca_reg_maxmr(struct ehca_shca *shca, h_ret, e_origmr, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, e_origmr->ib.ib_mr.lkey); - return ehca_mrmw_map_hrc_reg_smr(h_ret); + return ehca2ib_return_code(h_ret); } /* successful registration */ - e_newmr->num_pages = e_origmr->num_pages; - e_newmr->num_4k = e_origmr->num_4k; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; e_newmr->ipz_mr_handle = hipzout.handle; *lkey = hipzout.lkey; *rkey = hipzout.rkey; @@ -1677,299 +1759,352 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, /*----------------------------------------------------------------------*/ -/* setup page buffer from page info */ -int ehca_set_pagebuf(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) +/* PAGE_SIZE >= pginfo->hwpage_size */ +static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) { int ret = 0; struct ib_umem_chunk *prev_chunk; struct ib_umem_chunk *chunk; - struct ib_phys_buf *pbuf; - u64 *fmrlist; - u64 num4k, pgaddr, offs4k; + u64 pgaddr; u32 i = 0; u32 j = 0; - - if (pginfo->type == EHCA_MR_PGI_PHYS) { - /* loop over desired phys_buf_array entries */ - while (i < number) { - pbuf = pginfo->phys_buf_array + pginfo->next_buf; - num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; - offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; - while (pginfo->next_4k < offs4k + num4k) { - /* sanity check */ - if ((pginfo->page_cnt >= pginfo->num_pages) || - (pginfo->page_4k_cnt >= pginfo->num_4k)) { - ehca_gen_err("page_cnt >= num_pages, " - "page_cnt=%lx " - "num_pages=%lx " - "page_4k_cnt=%lx " - "num_4k=%lx i=%x", - pginfo->page_cnt, - pginfo->num_pages, - pginfo->page_4k_cnt, - pginfo->num_4k, i); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - *kpage = phys_to_abs( - (pbuf->addr & EHCA_PAGEMASK) - + (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%lx " - "pbuf->size=%lx " - "next_4k=%lx", pbuf->addr, - pbuf->size, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) - (pginfo->page_cnt)++; - kpage++; - i++; - if (i >= number) break; + int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; + + /* loop over desired chunk entries */ + chunk = pginfo->u.usr.next_chunk; + prev_chunk = pginfo->u.usr.next_chunk; + list_for_each_entry_continue( + chunk, (&(pginfo->u.usr.region->chunk_list)), list) { + for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { + pgaddr = page_to_pfn(chunk->page_list[i].page) + << PAGE_SHIFT ; + *kpage = phys_to_abs(pgaddr + + (pginfo->next_hwpage * + pginfo->hwpage_size)); + if ( !(*kpage) ) { + ehca_gen_err("pgaddr=%lx " + "chunk->page_list[i]=%lx " + "i=%x next_hwpage=%lx", + pgaddr, (u64)sg_dma_address( + &chunk->page_list[i]), + i, pginfo->next_hwpage); + return -EFAULT; } - if (pginfo->next_4k >= offs4k + num4k) { - (pginfo->next_buf)++; - pginfo->next_4k = 0; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + i++; } + j++; + if (j >= number) break; } - } else if (pginfo->type == EHCA_MR_PGI_USER) { - /* loop over desired chunk entries */ - chunk = pginfo->next_chunk; - prev_chunk = pginfo->next_chunk; - list_for_each_entry_continue(chunk, - (&(pginfo->region->chunk_list)), - list) { - for (i = pginfo->next_nmap; i < chunk->nmap; ) { + if ((pginfo->u.usr.next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; + } + pginfo->u.usr.next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->u.usr.region->chunk_list)), + list); + return ret; +} + +/* + * check given pages for contiguous layout + * last page addr is returned in prev_pgaddr for further check + */ +static int ehca_check_kpages_per_ate(struct scatterlist *page_list, + int start_idx, int end_idx, + u64 *prev_pgaddr) +{ + int t; + for (t = start_idx; t <= end_idx; t++) { + u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT; + ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr, + *(u64 *)abs_to_virt(phys_to_abs(pgaddr))); + if (pgaddr - PAGE_SIZE != *prev_pgaddr) { + ehca_gen_err("uncontiguous page found pgaddr=%lx " + "prev_pgaddr=%lx page_list_i=%x", + pgaddr, *prev_pgaddr, t); + return -EINVAL; + } + *prev_pgaddr = pgaddr; + } + return 0; +} + +/* PAGE_SIZE < pginfo->hwpage_size */ +static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + struct ib_umem_chunk *prev_chunk; + struct ib_umem_chunk *chunk; + u64 pgaddr, prev_pgaddr; + u32 i = 0; + u32 j = 0; + int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; + int nr_kpages = kpages_per_hwpage; + + /* loop over desired chunk entries */ + chunk = pginfo->u.usr.next_chunk; + prev_chunk = pginfo->u.usr.next_chunk; + list_for_each_entry_continue( + chunk, (&(pginfo->u.usr.region->chunk_list)), list) { + for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { + if (nr_kpages == kpages_per_hwpage) { pgaddr = ( page_to_pfn(chunk->page_list[i].page) << PAGE_SHIFT ); - *kpage = phys_to_abs(pgaddr + - (pginfo->next_4k * - EHCA_PAGESIZE)); + *kpage = phys_to_abs(pgaddr); if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%lx " - "chunk->page_list[i]=%lx " - "i=%x next_4k=%lx mr=%p", - pgaddr, - (u64)sg_dma_address( - &chunk-> - page_list[i]), - i, pginfo->next_4k, e_mr); + ehca_gen_err("pgaddr=%lx i=%x", + pgaddr, i); ret = -EFAULT; - goto ehca_set_pagebuf_exit0; + return ret; } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - kpage++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_nmap)++; - pginfo->next_4k = 0; - i++; + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%lx i=%x " + "mr_pgsize=%lx", + pgaddr, i, + pginfo->hwpage_size); + ret = -EFAULT; + return ret; + } + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = phys_to_abs( + pgaddr & + ~(pginfo->hwpage_size - 1)); } - j++; - if (j >= number) break; + ehca_gen_dbg("kpage=%lx chunk_page=%lx " + "value=%016lx", *kpage, pgaddr, + *(u64 *)abs_to_virt( + phys_to_abs(pgaddr))); + prev_pgaddr = pgaddr; + i++; + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; } - if ((pginfo->next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->next_nmap >= chunk->nmap) { - pginfo->next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + if (i + nr_kpages > chunk->nmap) { + ret = ehca_check_kpages_per_ate( + chunk->page_list, i, + chunk->nmap - 1, &prev_pgaddr); + if (ret) return ret; + pginfo->kpage_cnt += chunk->nmap - i; + pginfo->u.usr.next_nmap += chunk->nmap - i; + nr_kpages -= chunk->nmap - i; break; - else - prev_chunk = chunk; - } - pginfo->next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->region->chunk_list)), - list); - } else if (pginfo->type == EHCA_MR_PGI_FMR) { - /* loop over desired page_list entries */ - fmrlist = pginfo->page_list + pginfo->next_listelem; - for (i = 0; i < number; i++) { - *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + - pginfo->next_4k * EHCA_PAGESIZE); - if ( !(*kpage) ) { - ehca_gen_err("*fmrlist=%lx fmrlist=%p " - "next_listelem=%lx next_4k=%lx", - *fmrlist, fmrlist, - pginfo->next_listelem, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; + + ret = ehca_check_kpages_per_ate(chunk->page_list, i, + i + nr_kpages - 1, + &prev_pgaddr); + if (ret) return ret; + i += nr_kpages; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; +next_kpage: + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; kpage++; - if (pginfo->next_4k % - (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_listelem)++; - fmrlist++; - pginfo->next_4k = 0; - } + j++; + if (j >= number) break; } - } else { - ehca_gen_err("bad pginfo->type=%x", pginfo->type); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; + if ((pginfo->u.usr.next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; } - -ehca_set_pagebuf_exit0: - if (ret) - ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " - "num_4k=%lx next_buf=%lx next_4k=%lx number=%x " - "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x " - "next_listelem=%lx region=%p next_chunk=%p " - "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type, - pginfo->num_pages, pginfo->num_4k, - pginfo->next_buf, pginfo->next_4k, number, kpage, - pginfo->page_cnt, pginfo->page_4k_cnt, i, - pginfo->next_listelem, pginfo->region, - pginfo->next_chunk, pginfo->next_nmap); + pginfo->u.usr.next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->u.usr.region->chunk_list)), + list); return ret; -} /* end ehca_set_pagebuf() */ - -/*----------------------------------------------------------------------*/ +} -/* setup 1 page from page info page buffer */ -int ehca_set_pagebuf_1(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u64 *rpage) +int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) { int ret = 0; - struct ib_phys_buf *tmp_pbuf; - u64 *fmrlist; - struct ib_umem_chunk *chunk; - struct ib_umem_chunk *prev_chunk; - u64 pgaddr, num4k, offs4k; - - if (pginfo->type == EHCA_MR_PGI_PHYS) { - /* sanity check */ - if ((pginfo->page_cnt >= pginfo->num_pages) || - (pginfo->page_4k_cnt >= pginfo->num_4k)) { - ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx " - "num_pages=%lx page_4k_cnt=%lx num_4k=%lx", - pginfo->page_cnt, pginfo->num_pages, - pginfo->page_4k_cnt, pginfo->num_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf; - num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; - offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; - *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) + - (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*rpage) && tmp_pbuf->addr ) { - ehca_gen_err("tmp_pbuf->addr=%lx" - " tmp_pbuf->size=%lx next_4k=%lx", - tmp_pbuf->addr, tmp_pbuf->size, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0) - (pginfo->page_cnt)++; - if (pginfo->next_4k >= offs4k + num4k) { - (pginfo->next_buf)++; - pginfo->next_4k = 0; - } - } else if (pginfo->type == EHCA_MR_PGI_USER) { - chunk = pginfo->next_chunk; - prev_chunk = pginfo->next_chunk; - list_for_each_entry_continue(chunk, - (&(pginfo->region->chunk_list)), - list) { - pgaddr = ( page_to_pfn(chunk->page_list[ - pginfo->next_nmap].page) - << PAGE_SHIFT); - *rpage = phys_to_abs(pgaddr + - (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*rpage) ) { - ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx" - " next_nmap=%lx next_4k=%lx mr=%p", - pgaddr, (u64)sg_dma_address( - &chunk->page_list[ - pginfo-> - next_nmap]), - pginfo->next_nmap, pginfo->next_4k, - e_mr); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_nmap)++; - pginfo->next_4k = 0; + struct ib_phys_buf *pbuf; + u64 num_hw, offs_hw; + u32 i = 0; + + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; + num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + + pbuf->size, pginfo->hwpage_size); + offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / + pginfo->hwpage_size; + while (pginfo->next_hwpage < offs_hw + num_hw) { + /* sanity check */ + if ((pginfo->kpage_cnt >= pginfo->num_kpages) || + (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { + ehca_gen_err("kpage_cnt >= num_kpages, " + "kpage_cnt=%lx num_kpages=%lx " + "hwpage_cnt=%lx " + "num_hwpages=%lx i=%x", + pginfo->kpage_cnt, + pginfo->num_kpages, + pginfo->hwpage_cnt, + pginfo->num_hwpages, i); + return -EFAULT; } - if (pginfo->next_nmap >= chunk->nmap) { - pginfo->next_nmap = 0; - prev_chunk = chunk; + *kpage = phys_to_abs( + (pbuf->addr & ~(pginfo->hwpage_size - 1)) + + (pginfo->next_hwpage * pginfo->hwpage_size)); + if ( !(*kpage) && pbuf->addr ) { + ehca_gen_err("pbuf->addr=%lx pbuf->size=%lx " + "next_hwpage=%lx", pbuf->addr, + pbuf->size, pginfo->next_hwpage); + return -EFAULT; } - break; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + if (PAGE_SIZE >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (PAGE_SIZE / pginfo->hwpage_size) == 0) + (pginfo->kpage_cnt)++; + } else + pginfo->kpage_cnt += pginfo->hwpage_size / + PAGE_SIZE; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_hwpage >= offs_hw + num_hw) { + (pginfo->u.phy.next_buf)++; + pginfo->next_hwpage = 0; } - pginfo->next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->region->chunk_list)), - list); - } else if (pginfo->type == EHCA_MR_PGI_FMR) { - fmrlist = pginfo->page_list + pginfo->next_listelem; - *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + - pginfo->next_4k * EHCA_PAGESIZE); - if ( !(*rpage) ) { + } + return ret; +} + +int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + u64 *fmrlist; + u32 i; + + /* loop over desired page_list entries */ + fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; + for (i = 0; i < number; i++) { + *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) + + pginfo->next_hwpage * pginfo->hwpage_size); + if ( !(*kpage) ) { ehca_gen_err("*fmrlist=%lx fmrlist=%p " - "next_listelem=%lx next_4k=%lx", - *fmrlist, fmrlist, pginfo->next_listelem, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; + "next_listelem=%lx next_hwpage=%lx", + *fmrlist, fmrlist, + pginfo->u.fmr.next_listelem, + pginfo->next_hwpage); + return -EFAULT; } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_listelem)++; - pginfo->next_4k = 0; + (pginfo->hwpage_cnt)++; + if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (pginfo->u.fmr.fmr_pgsize / + pginfo->hwpage_size) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.fmr.next_listelem)++; + fmrlist++; + pginfo->next_hwpage = 0; + } else + (pginfo->next_hwpage)++; + } else { + unsigned int cnt_per_hwpage = pginfo->hwpage_size / + pginfo->u.fmr.fmr_pgsize; + unsigned int j; + u64 prev = *kpage; + /* check if adrs are contiguous */ + for (j = 1; j < cnt_per_hwpage; j++) { + u64 p = phys_to_abs(fmrlist[j] & + ~(pginfo->hwpage_size - 1)); + if (prev + pginfo->u.fmr.fmr_pgsize != p) { + ehca_gen_err("uncontiguous fmr pages " + "found prev=%lx p=%lx " + "idx=%x", prev, p, i + j); + return -EINVAL; + } + prev = p; + } + pginfo->kpage_cnt += cnt_per_hwpage; + pginfo->u.fmr.next_listelem += cnt_per_hwpage; + fmrlist += cnt_per_hwpage; } - } else { + kpage++; + } + return ret; +} + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret; + + switch (pginfo->type) { + case EHCA_MR_PGI_PHYS: + ret = ehca_set_pagebuf_phys(pginfo, number, kpage); + break; + case EHCA_MR_PGI_USER: + ret = PAGE_SIZE >= pginfo->hwpage_size ? + ehca_set_pagebuf_user1(pginfo, number, kpage) : + ehca_set_pagebuf_user2(pginfo, number, kpage); + break; + case EHCA_MR_PGI_FMR: + ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); + break; + default: ehca_gen_err("bad pginfo->type=%x", pginfo->type); ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; + break; } - -ehca_set_pagebuf_1_exit0: - if (ret) - ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " - "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p " - "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx " - "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr, - pginfo, pginfo->type, pginfo->num_pages, - pginfo->num_4k, pginfo->next_buf, pginfo->next_4k, - rpage, pginfo->page_cnt, pginfo->page_4k_cnt, - pginfo->next_listelem, pginfo->region, - pginfo->next_chunk, pginfo->next_nmap); return ret; -} /* end ehca_set_pagebuf_1() */ +} /* end ehca_set_pagebuf() */ /*----------------------------------------------------------------------*/ @@ -1982,7 +2117,7 @@ int ehca_mr_is_maxmr(u64 size, { /* a MR is treated as max-MR only if it fits following: */ if ((size == ((u64)high_memory - PAGE_OFFSET)) && - (iova_start == (void*)KERNELBASE)) { + (iova_start == (void *)KERNELBASE)) { ehca_gen_dbg("this is a max-MR"); return 1; } else @@ -2011,9 +2146,9 @@ void ehca_mrmw_map_acl(int ib_acl, /*----------------------------------------------------------------------*/ /* sets page size in hipz access control for MR/MW. */ -void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/ +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/ { - return; /* HCA supports only 4k */ + *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24); } /* end ehca_mrmw_set_pgsize_hipz_acl() */ /*----------------------------------------------------------------------*/ @@ -2042,196 +2177,23 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, /*----------------------------------------------------------------------*/ /* - * map HIPZ rc to IB retcodes for MR/MW allocations - * Used for hipz_mr_reg_alloc and hipz_mw_alloc. - */ -int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - case H_CONSTRAINED: /* resource constraint */ - case H_NO_MEM: - return -ENOMEM; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_alloc() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for MR register rpage - * Used for hipz_h_register_rpage_mr at registering last page - */ -int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* registration complete */ - return 0; - case H_PAGE_REGISTERED: /* page registered */ - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ -/* case H_QT_PARM: invalid queue type */ - case H_PARAMETER: /* - * invalid logical address, - * or count zero or greater 512 - */ - case H_TABLE_FULL: /* page table full */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_rrpg_last() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for MR register rpage - * Used for hipz_h_register_rpage_mr at registering one page, but not last page - */ -int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_PAGE_REGISTERED: /* page registered */ - return 0; - case H_SUCCESS: /* registration complete */ - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ -/* case H_QT_PARM: invalid queue type */ - case H_PARAMETER: /* - * invalid logical address, - * or count zero or greater 512 - */ - case H_TABLE_FULL: /* page table full */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_rrpg_notlast() */ - -/*----------------------------------------------------------------------*/ - -/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */ -int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_query_mr() */ - -/*----------------------------------------------------------------------*/ -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for freeing MR resource - * Used for hipz_h_free_resource_mr - */ -int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* resource freed */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_R_STATE: /* invalid resource state */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_free_mr() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for freeing MW resource - * Used for hipz_h_free_resource_mw - */ -int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* resource freed */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_R_STATE: /* invalid resource state */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_free_mw() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for SMR registrations - * Used for hipz_h_register_smr. - */ -int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_MEM_PARM: /* invalid MR virtual address */ - case H_MEM_ACCESS_PARM: /* invalid access controls */ - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_reg_smr() */ - -/*----------------------------------------------------------------------*/ - -/* * MR destructor and constructor * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, * except struct ib_mr and spinlock */ void ehca_mr_deletenew(struct ehca_mr *mr) { - mr->flags = 0; - mr->num_pages = 0; - mr->num_4k = 0; - mr->acl = 0; - mr->start = NULL; + mr->flags = 0; + mr->num_kpages = 0; + mr->num_hwpages = 0; + mr->acl = 0; + mr->start = NULL; mr->fmr_page_size = 0; mr->fmr_max_pages = 0; - mr->fmr_max_maps = 0; - mr->fmr_map_cnt = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); memset(&mr->galpas, 0, sizeof(mr->galpas)); - mr->nr_of_pages = 0; - mr->pagearray = NULL; } /* end ehca_mr_deletenew() */ int ehca_init_mrmw_cache(void) @@ -2239,13 +2201,13 @@ int ehca_init_mrmw_cache(void) mr_cache = kmem_cache_create("ehca_cache_mr", sizeof(struct ehca_mr), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!mr_cache) return -ENOMEM; mw_cache = kmem_cache_create("ehca_cache_mw", sizeof(struct ehca_mw), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!mw_cache) { kmem_cache_destroy(mr_cache); mr_cache = NULL; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h index d936e40a5748..bc8f4e31c123 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -101,40 +101,21 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, u64 *page_list, int list_len); -int ehca_set_pagebuf(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, u32 number, u64 *kpage); -int ehca_set_pagebuf_1(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u64 *rpage); - int ehca_mr_is_maxmr(u64 size, u64 *iova_start); void ehca_mrmw_map_acl(int ib_acl, u32 *hipz_acl); -void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl); +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl); void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, int *ib_acl); -int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc); - void ehca_mr_deletenew(struct ehca_mr *mr); #endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c index 79d0591a8043..3dafd7ff36cd 100644 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -49,6 +49,7 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device, struct ib_ucontext *context, struct ib_udata *udata) { struct ehca_pd *pd; + int i; pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); if (!pd) { @@ -58,6 +59,11 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device, } pd->ownpid = current->tgid; + for (i = 0; i < 2; i++) { + INIT_LIST_HEAD(&pd->free[i]); + INIT_LIST_HEAD(&pd->full[i]); + } + mutex_init(&pd->lock); /* * Kernel PD: when device = -1, 0 @@ -81,6 +87,9 @@ int ehca_dealloc_pd(struct ib_pd *pd) { u32 cur_pid = current->tgid; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + int i, leftovers = 0; + extern struct kmem_cache *small_qp_cache; + struct ipz_small_queue_page *page, *tmp; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && my_pd->ownpid != cur_pid) { @@ -89,8 +98,20 @@ int ehca_dealloc_pd(struct ib_pd *pd) return -EINVAL; } - kmem_cache_free(pd_cache, - container_of(pd, struct ehca_pd, ib_pd)); + for (i = 0; i < 2; i++) { + list_splice(&my_pd->full[i], &my_pd->free[i]); + list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { + leftovers = 1; + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } + } + + if (leftovers) + ehca_warn(pd->device, + "Some small queue pages were not freed"); + + kmem_cache_free(pd_cache, my_pd); return 0; } @@ -100,7 +121,7 @@ int ehca_init_pd_cache(void) pd_cache = kmem_cache_create("ehca_cache_pd", sizeof(struct ehca_pd), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!pd_cache) return -ENOMEM; return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h index 8707d297ce4c..818803057ebf 100644 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -53,13 +53,13 @@ struct ehca_vsgentry { u32 length; }; -#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7) -#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3) -#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12) -#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31) -#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47) -#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55) -#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63) +#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) /* * Unreliable Datagram Address Vector Format @@ -206,10 +206,10 @@ struct ehca_wqe { }; -#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0) -#define WC_IMM_DATA EHCA_BMASK_IBM(1,1) -#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2) -#define WC_SE_BIT EHCA_BMASK_IBM(3,3) +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) +#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) #define WC_STATUS_ERROR_BIT 0x80000000 #define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 #define WC_STATUS_PURGE_BIT 0x10 diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index b5bc787c77b6..b178cba96345 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -3,7 +3,9 @@ * * QP functions * - * Authors: Waleri Fomin <fomin@de.ibm.com> + * Authors: Joachim Fenkes <fenkes@de.ibm.com> + * Stefan Roscher <stefan.roscher@de.ibm.com> + * Waleri Fomin <fomin@de.ibm.com> * Hoang-Nam Nguyen <hnguyen@de.ibm.com> * Reinhard Ernst <rernst@de.ibm.com> * Heiko J Schick <schickhj@de.ibm.com> @@ -234,13 +236,6 @@ static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, return index; } -enum ehca_service_type { - ST_RC = 0, - ST_UC = 1, - ST_RD = 2, - ST_UD = 3 -}; - /* * ibqptype2servicetype returns hcp service type corresponding to given * ib qp type used by create_qp() @@ -268,143 +263,169 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) } /* - * init_qp_queues initializes/constructs r/squeue and registers queue pages. + * init userspace queue info from ipz_queue data + */ +static inline void queue2resp(struct ipzu_queue_resp *resp, + struct ipz_queue *queue) +{ + resp->qe_size = queue->qe_size; + resp->act_nr_of_sg = queue->act_nr_of_sg; + resp->queue_length = queue->queue_length; + resp->pagesize = queue->pagesize; + resp->toggle_state = queue->toggle_state; +} + +/* + * init_qp_queue initializes/constructs r/squeue and registers queue pages. */ -static inline int init_qp_queues(struct ehca_shca *shca, - struct ehca_qp *my_qp, - int nr_sq_pages, - int nr_rq_pages, - int swqe_size, - int rwqe_size, - int nr_send_sges, int nr_receive_sges) +static inline int init_qp_queue(struct ehca_shca *shca, + struct ehca_pd *pd, + struct ehca_qp *my_qp, + struct ipz_queue *queue, + int q_type, + u64 expected_hret, + struct ehca_alloc_queue_parms *parms, + int wqe_size) { - int ret, cnt, ipz_rc; + int ret, cnt, ipz_rc, nr_q_pages; void *vpage; u64 rpage, h_ret; struct ib_device *ib_dev = &shca->ib_device; struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; - ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue, - nr_sq_pages, - EHCA_PAGESIZE, swqe_size, nr_send_sges); - if (!ipz_rc) { - ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x", - ipz_rc); - return -EBUSY; + if (!parms->queue_size) + return 0; + + if (parms->is_small) { + nr_q_pages = 1; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + 128 << parms->page_size, + wqe_size, parms->act_nr_sges, 1); + } else { + nr_q_pages = parms->queue_size; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + EHCA_PAGESIZE, wqe_size, + parms->act_nr_sges, 0); } - ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue, - nr_rq_pages, - EHCA_PAGESIZE, rwqe_size, nr_receive_sges); if (!ipz_rc) { - ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x", + ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x", ipz_rc); - ret = -EBUSY; - goto init_qp_queues0; + return -EBUSY; } - /* register SQ pages */ - for (cnt = 0; cnt < nr_sq_pages; cnt++) { - vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue); + + /* register queue pages */ + for (cnt = 0; cnt < nr_q_pages; cnt++) { + vpage = ipz_qpageit_get_inc(queue); if (!vpage) { - ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() " + ehca_err(ib_dev, "ipz_qpageit_get_inc() " "failed p_vpage= %p", vpage); ret = -EINVAL; - goto init_qp_queues1; + goto init_qp_queue1; } rpage = virt_to_abs(vpage); h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, my_qp->ipz_qp_handle, - &my_qp->pf, 0, 0, - rpage, 1, + NULL, 0, q_type, + rpage, parms->is_small ? 0 : 1, my_qp->galpas.kernel); - if (h_ret < H_SUCCESS) { - ehca_err(ib_dev, "SQ hipz_qp_register_rpage()" - " failed rc=%lx", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; - } - } - - ipz_qeit_reset(&my_qp->ipz_squeue); - - /* register RQ pages */ - for (cnt = 0; cnt < nr_rq_pages; cnt++) { - vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); - if (!vpage) { - ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() " - "failed p_vpage = %p", vpage); - ret = -EINVAL; - goto init_qp_queues1; - } - - rpage = virt_to_abs(vpage); - - h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, 0, 1, - rpage, 1,my_qp->galpas.kernel); - if (h_ret < H_SUCCESS) { - ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed " - "rc=%lx", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; - } - if (cnt == (nr_rq_pages - 1)) { /* last page! */ - if (h_ret != H_SUCCESS) { - ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " + if (cnt == (nr_q_pages - 1)) { /* last page! */ + if (h_ret != expected_hret) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " "h_ret= %lx ", h_ret); ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; + goto init_qp_queue1; } vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); if (vpage) { ehca_err(ib_dev, "ipz_qpageit_get_inc() " "should not succeed vpage=%p", vpage); ret = -EINVAL; - goto init_qp_queues1; + goto init_qp_queue1; } } else { if (h_ret != H_PAGE_REGISTERED) { - ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " + ehca_err(ib_dev, "hipz_qp_register_rpage() " "h_ret= %lx ", h_ret); ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; + goto init_qp_queue1; } } } - ipz_qeit_reset(&my_qp->ipz_rqueue); + ipz_qeit_reset(queue); return 0; -init_qp_queues1: - ipz_queue_dtor(&my_qp->ipz_rqueue); -init_qp_queues0: - ipz_queue_dtor(&my_qp->ipz_squeue); +init_qp_queue1: + ipz_queue_dtor(pd, queue); return ret; } -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) +{ + if (is_llqp) + return 128 << act_nr_sge; + else + return offsetof(struct ehca_wqe, + u.nud.sg_list[act_nr_sge]); +} + +static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, + int req_nr_sge, int is_llqp) +{ + u32 wqe_size, q_size; + int act_nr_sge = req_nr_sge; + + if (!is_llqp) + /* round up #SGEs so WQE size is a power of 2 */ + for (act_nr_sge = 4; act_nr_sge <= 252; + act_nr_sge = 4 + 2 * act_nr_sge) + if (act_nr_sge >= req_nr_sge) + break; + + wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); + q_size = wqe_size * (queue->max_wr + 1); + + if (q_size <= 512) + queue->page_size = 2; + else if (q_size <= 1024) + queue->page_size = 3; + else + queue->page_size = 0; + + queue->is_small = (queue->page_size != 0); +} + +/* + * Create an ib_qp struct that is either a QP or an SRQ, depending on + * the value of the is_srq parameter. If init_attr and srq_init_attr share + * fields, the field out of init_attr is used. + */ +static struct ehca_qp *internal_create_qp( + struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata, int is_srq) { - static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 }; - static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 }; struct ehca_qp *my_qp; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ib_ucontext *context = NULL; u64 h_ret; - int max_send_sge, max_recv_sge, ret; + int is_llqp = 0, has_srq = 0; + int qp_type, max_send_sge, max_recv_sge, ret; /* h_call's out parameters */ struct ehca_alloc_qp_parms parms; - u32 swqe_size = 0, rwqe_size = 0; - u8 daqp_completion, isdaqp; + u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; unsigned long flags; + memset(&parms, 0, sizeof(parms)); + qp_type = init_attr->qp_type; + if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", @@ -412,41 +433,98 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - /* save daqp completion bits */ - daqp_completion = init_attr->qp_type & 0x60; - /* save daqp bit */ - isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0; - init_attr->qp_type = init_attr->qp_type & 0x1F; + /* save LLQP info */ + if (qp_type & 0x80) { + is_llqp = 1; + parms.ext_type = EQPT_LLQP; + parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; + } + qp_type &= 0x1F; + init_attr->qp_type &= 0x1F; - if (init_attr->qp_type != IB_QPT_UD && - init_attr->qp_type != IB_QPT_SMI && - init_attr->qp_type != IB_QPT_GSI && - init_attr->qp_type != IB_QPT_UC && - init_attr->qp_type != IB_QPT_RC) { - ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type); - return ERR_PTR(-EINVAL); + /* handle SRQ base QPs */ + if (init_attr->srq) { + struct ehca_qp *my_srq = + container_of(init_attr->srq, struct ehca_qp, ib_srq); + + has_srq = 1; + parms.ext_type = EQPT_SRQBASE; + parms.srq_qpn = my_srq->real_qp_num; + parms.srq_token = my_srq->token; } - if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD) - && isdaqp) { - ehca_err(pd->device, "unsupported LL QP Type=%x", - init_attr->qp_type); + + if (is_llqp && has_srq) { + ehca_err(pd->device, "LLQPs can't have an SRQ"); return ERR_PTR(-EINVAL); - } else if (init_attr->qp_type == IB_QPT_RC && isdaqp && - (init_attr->cap.max_send_wr > 255 || - init_attr->cap.max_recv_wr > 255 )) { - ehca_err(pd->device, "Invalid Number of max_sq_wr =%x " - "or max_rq_wr=%x for QP Type=%x", - init_attr->cap.max_send_wr, - init_attr->cap.max_recv_wr,init_attr->qp_type); - return ERR_PTR(-EINVAL); - } else if (init_attr->qp_type == IB_QPT_UD && isdaqp && - init_attr->cap.max_send_wr > 255) { - ehca_err(pd->device, - "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x", - init_attr->cap.max_send_wr, init_attr->qp_type); + } + + /* handle SRQs */ + if (is_srq) { + parms.ext_type = EQPT_SRQ; + parms.srq_limit = srq_init_attr->attr.srq_limit; + if (init_attr->cap.max_recv_sge > 3) { + ehca_err(pd->device, "no more than three SGEs " + "supported for SRQ pd=%p max_sge=%x", + pd, init_attr->cap.max_recv_sge); + return ERR_PTR(-EINVAL); + } + } + + /* check QP type */ + if (qp_type != IB_QPT_UD && + qp_type != IB_QPT_UC && + qp_type != IB_QPT_RC && + qp_type != IB_QPT_SMI && + qp_type != IB_QPT_GSI) { + ehca_err(pd->device, "wrong QP Type=%x", qp_type); return ERR_PTR(-EINVAL); } + if (is_llqp) { + switch (qp_type) { + case IB_QPT_RC: + if ((init_attr->cap.max_send_wr > 255) || + (init_attr->cap.max_recv_wr > 255)) { + ehca_err(pd->device, + "Invalid Number of max_sq_wr=%x " + "or max_rq_wr=%x for RC LLQP", + init_attr->cap.max_send_wr, + init_attr->cap.max_recv_wr); + return ERR_PTR(-EINVAL); + } + break; + case IB_QPT_UD: + if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { + ehca_err(pd->device, "UD LLQP not supported " + "by this adapter"); + return ERR_PTR(-ENOSYS); + } + if (!(init_attr->cap.max_send_sge <= 5 + && init_attr->cap.max_send_sge >= 1 + && init_attr->cap.max_recv_sge <= 5 + && init_attr->cap.max_recv_sge >= 1)) { + ehca_err(pd->device, + "Invalid Number of max_send_sge=%x " + "or max_recv_sge=%x for UD LLQP", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge); + return ERR_PTR(-EINVAL); + } else if (init_attr->cap.max_send_wr > 255) { + ehca_err(pd->device, + "Invalid Number of " + "ax_send_wr=%x for UD QP_TYPE=%x", + init_attr->cap.max_send_wr, qp_type); + return ERR_PTR(-EINVAL); + } + break; + default: + ehca_err(pd->device, "unsupported LL QP Type=%x", + qp_type); + return ERR_PTR(-EINVAL); + break; + } + } + if (pd->uobject && udata) context = pd->uobject->context; @@ -456,16 +534,17 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, return ERR_PTR(-ENOMEM); } - memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms)); spin_lock_init(&my_qp->spinlock_s); spin_lock_init(&my_qp->spinlock_r); + my_qp->qp_type = qp_type; + my_qp->ext_type = parms.ext_type; - my_qp->recv_cq = - container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); - my_qp->send_cq = - container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - - my_qp->init_attr = *init_attr; + if (init_attr->recv_cq) + my_qp->recv_cq = + container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); + if (init_attr->send_cq) + my_qp->send_cq = + container_of(init_attr->send_cq, struct ehca_cq, ib_cq); do { if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { @@ -474,9 +553,9 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, goto create_qp_exit0; } - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + write_lock_irqsave(&ehca_qp_idr_lock, flags); ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); } while (ret == -EAGAIN); @@ -486,10 +565,10 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, goto create_qp_exit0; } - parms.servicetype = ibqptype2servicetype(init_attr->qp_type); + parms.servicetype = ibqptype2servicetype(qp_type); if (parms.servicetype < 0) { ret = -EINVAL; - ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type); + ehca_err(pd->device, "Invalid qp_type=%x", qp_type); goto create_qp_exit0; } @@ -501,21 +580,35 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, /* UD_AV CIRCUMVENTION */ max_send_sge = init_attr->cap.max_send_sge; max_recv_sge = init_attr->cap.max_recv_sge; - if (IB_QPT_UD == init_attr->qp_type || - IB_QPT_GSI == init_attr->qp_type || - IB_QPT_SMI == init_attr->qp_type) { + if (parms.servicetype == ST_UD && !is_llqp) { max_send_sge += 2; max_recv_sge += 2; } - parms.ipz_eq_handle = shca->eq.ipz_eq_handle; - parms.daqp_ctrl = isdaqp | daqp_completion; + parms.token = my_qp->token; + parms.eq_handle = shca->eq.ipz_eq_handle; parms.pd = my_pd->fw_pd; - parms.max_recv_sge = max_recv_sge; - parms.max_send_sge = max_send_sge; - - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms); - + if (my_qp->send_cq) + parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; + if (my_qp->recv_cq) + parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; + + parms.squeue.max_wr = init_attr->cap.max_send_wr; + parms.rqueue.max_wr = init_attr->cap.max_recv_wr; + parms.squeue.max_sge = max_send_sge; + parms.rqueue.max_sge = max_recv_sge; + + if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap) + && !(context && udata)) { /* no small QP support in userspace ATM */ + ehca_determine_small_queue( + &parms.squeue, max_send_sge, is_llqp); + ehca_determine_small_queue( + &parms.rqueue, max_recv_sge, is_llqp); + parms.qp_storage = + (parms.squeue.is_small || parms.rqueue.is_small); + } + + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); if (h_ret != H_SUCCESS) { ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx", h_ret); @@ -523,55 +616,38 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, goto create_qp_exit1; } - my_qp->ib_qp.qp_num = my_qp->real_qp_num; + ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; + my_qp->ipz_qp_handle = parms.qp_handle; + my_qp->galpas = parms.galpas; - switch (init_attr->qp_type) { + swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); + rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); + + switch (qp_type) { case IB_QPT_RC: - if (isdaqp == 0) { - swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ - (parms.act_nr_send_sges)]); - rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ - (parms.act_nr_recv_sges)]); - } else { /* for daqp we need to use msg size, not wqe size */ - swqe_size = da_rc_msg_size[max_send_sge]; - rwqe_size = da_rc_msg_size[max_recv_sge]; - parms.act_nr_send_sges = 1; - parms.act_nr_recv_sges = 1; + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; } break; - case IB_QPT_UC: - swqe_size = offsetof(struct ehca_wqe, - u.nud.sg_list[parms.act_nr_send_sges]); - rwqe_size = offsetof(struct ehca_wqe, - u.nud.sg_list[parms.act_nr_recv_sges]); - break; - case IB_QPT_UD: case IB_QPT_GSI: case IB_QPT_SMI: /* UD circumvention */ - parms.act_nr_recv_sges -= 2; - parms.act_nr_send_sges -= 2; - if (isdaqp) { - swqe_size = da_ud_sq_msg_size[max_send_sge]; - rwqe_size = da_rc_msg_size[max_recv_sge]; - parms.act_nr_send_sges = 1; - parms.act_nr_recv_sges = 1; + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; } else { - swqe_size = offsetof(struct ehca_wqe, - u.ud_av.sg_list[parms.act_nr_send_sges]); - rwqe_size = offsetof(struct ehca_wqe, - u.ud_av.sg_list[parms.act_nr_recv_sges]); + parms.squeue.act_nr_sges -= 2; + parms.rqueue.act_nr_sges -= 2; } - if (IB_QPT_GSI == init_attr->qp_type || - IB_QPT_SMI == init_attr->qp_type) { - parms.act_nr_send_wqes = init_attr->cap.max_send_wr; - parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; - parms.act_nr_send_sges = init_attr->cap.max_send_sge; - parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; - my_qp->ib_qp.qp_num = - (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1; + if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { + parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; + parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; + parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; + parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; + ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; } break; @@ -580,108 +656,234 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, break; } - /* initializes r/squeue and registers queue pages */ - ret = init_qp_queues(shca, my_qp, - parms.nr_sq_pages, parms.nr_rq_pages, - swqe_size, rwqe_size, - parms.act_nr_send_sges, parms.act_nr_recv_sges); - if (ret) { - ehca_err(pd->device, - "Couldn't initialize r/squeue and pages ret=%x", ret); - goto create_qp_exit2; + /* initialize r/squeue and register queue pages */ + if (HAS_SQ(my_qp)) { + ret = init_qp_queue( + shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, + HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, + &parms.squeue, swqe_size); + if (ret) { + ehca_err(pd->device, "Couldn't initialize squeue " + "and pages ret=%x", ret); + goto create_qp_exit2; + } } - my_qp->ib_qp.pd = &my_pd->ib_pd; - my_qp->ib_qp.device = my_pd->ib_pd.device; + if (HAS_RQ(my_qp)) { + ret = init_qp_queue( + shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, + H_SUCCESS, &parms.rqueue, rwqe_size); + if (ret) { + ehca_err(pd->device, "Couldn't initialize rqueue " + "and pages ret=%x", ret); + goto create_qp_exit3; + } + } + + if (is_srq) { + my_qp->ib_srq.pd = &my_pd->ib_pd; + my_qp->ib_srq.device = my_pd->ib_pd.device; - my_qp->ib_qp.recv_cq = init_attr->recv_cq; - my_qp->ib_qp.send_cq = init_attr->send_cq; + my_qp->ib_srq.srq_context = init_attr->qp_context; + my_qp->ib_srq.event_handler = init_attr->event_handler; + } else { + my_qp->ib_qp.qp_num = ib_qp_num; + my_qp->ib_qp.pd = &my_pd->ib_pd; + my_qp->ib_qp.device = my_pd->ib_pd.device; - my_qp->ib_qp.qp_type = init_attr->qp_type; + my_qp->ib_qp.recv_cq = init_attr->recv_cq; + my_qp->ib_qp.send_cq = init_attr->send_cq; - my_qp->qp_type = init_attr->qp_type; - my_qp->ib_qp.srq = init_attr->srq; + my_qp->ib_qp.qp_type = qp_type; + my_qp->ib_qp.srq = init_attr->srq; - my_qp->ib_qp.qp_context = init_attr->qp_context; - my_qp->ib_qp.event_handler = init_attr->event_handler; + my_qp->ib_qp.qp_context = init_attr->qp_context; + my_qp->ib_qp.event_handler = init_attr->event_handler; + } init_attr->cap.max_inline_data = 0; /* not supported yet */ - init_attr->cap.max_recv_sge = parms.act_nr_recv_sges; - init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes; - init_attr->cap.max_send_sge = parms.act_nr_send_sges; - init_attr->cap.max_send_wr = parms.act_nr_send_wqes; + init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; + init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; + init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; + init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; + my_qp->init_attr = *init_attr; /* NOTE: define_apq0() not supported yet */ - if (init_attr->qp_type == IB_QPT_GSI) { + if (qp_type == IB_QPT_GSI) { h_ret = ehca_define_sqp(shca, my_qp, init_attr); if (h_ret != H_SUCCESS) { ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx", h_ret); ret = ehca2ib_return_code(h_ret); - goto create_qp_exit3; + goto create_qp_exit4; } } - if (init_attr->send_cq) { - struct ehca_cq *cq = container_of(init_attr->send_cq, - struct ehca_cq, ib_cq); - ret = ehca_cq_assign_qp(cq, my_qp); + + if (my_qp->send_cq) { + ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); if (ret) { - ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", - ret); - goto create_qp_exit3; + ehca_err(pd->device, + "Couldn't assign qp to send_cq ret=%x", ret); + goto create_qp_exit4; } - my_qp->send_cq = cq; } + /* copy queues, galpa data to user space */ if (context && udata) { - struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue; - struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue; struct ehca_create_qp_resp resp; memset(&resp, 0, sizeof(resp)); resp.qp_num = my_qp->real_qp_num; resp.token = my_qp->token; resp.qp_type = my_qp->qp_type; + resp.ext_type = my_qp->ext_type; resp.qkey = my_qp->qkey; resp.real_qp_num = my_qp->real_qp_num; - /* rqueue properties */ - resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size; - resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg; - resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; - resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; - resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state; - /* squeue properties */ - resp.ipz_squeue.qe_size = ipz_squeue->qe_size; - resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg; - resp.ipz_squeue.queue_length = ipz_squeue->queue_length; - resp.ipz_squeue.pagesize = ipz_squeue->pagesize; - resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state; + resp.ipz_rqueue.offset = my_qp->ipz_rqueue.offset; + resp.ipz_squeue.offset = my_qp->ipz_squeue.offset; + if (HAS_SQ(my_qp)) + queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); + if (HAS_RQ(my_qp)) + queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit3; + goto create_qp_exit4; } } - return &my_qp->ib_qp; + return my_qp; + +create_qp_exit4: + if (HAS_RQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); create_qp_exit3: - ipz_queue_dtor(&my_qp->ipz_rqueue); - ipz_queue_dtor(&my_qp->ipz_squeue); + if (HAS_SQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); create_qp_exit2: hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); create_qp_exit1: - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + write_lock_irqsave(&ehca_qp_idr_lock, flags); idr_remove(&ehca_qp_idr, my_qp->token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); create_qp_exit0: kmem_cache_free(qp_cache, my_qp); return ERR_PTR(ret); } +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) +{ + struct ehca_qp *ret; + + ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); + return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; +} + +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct ib_qp_init_attr qp_init_attr; + struct ehca_qp *my_qp; + struct ib_srq *ret; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 hret, update_mask; + + /* For common attributes, internal_create_qp() takes its info + * out of qp_init_attr, so copy all common attrs there. + */ + memset(&qp_init_attr, 0, sizeof(qp_init_attr)); + qp_init_attr.event_handler = srq_init_attr->event_handler; + qp_init_attr.qp_context = srq_init_attr->srq_context; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.qp_type = IB_QPT_RC; + qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; + qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; + + my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); + if (IS_ERR(my_qp)) + return (struct ib_srq *)my_qp; + + /* copy back return values */ + srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; + srq_init_attr->attr.max_sge = qp_init_attr.cap.max_recv_sge; + + /* drive SRQ into RTR state */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(pd->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + ret = ERR_PTR(-ENOMEM); + goto create_srq1; + } + + mqpcb->qp_state = EHCA_QPS_INIT; + mqpcb->prim_phys_port = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to INIT" + "ehca_qp=%p qp_num=%x hret=%lx", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_enable = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not enable SRQ" + "ehca_qp=%p qp_num=%x hret=%lx", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_state = EHCA_QPS_RTR; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to RTR" + "ehca_qp=%p qp_num=%x hret=%lx", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + return &my_qp->ib_srq; + +create_srq2: + ret = ERR_PTR(ehca2ib_return_code(hret)); + ehca_free_fw_ctrlblock(mqpcb); + +create_srq1: + internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); + + return ret; +} + /* * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe @@ -707,7 +909,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, my_qp, qp_num, h_ret); return ehca2ib_return_code(h_ret); } - bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63))); + bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); /* convert wqe pointer to vadr */ @@ -722,7 +924,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, } /* loop sets wqe's purge bit */ - wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = 0; while (wqe->optype != 0xff && wqe->wqef != 0xff) { if (ehca_debug_level) @@ -730,7 +932,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, wqe->nr_of_data_seg = 0; /* suppress data access */ wqe->wqef = WQEF_PURGE; /* WQE to be purged */ q_ofs = ipz_queue_advance_offset(squeue, q_ofs); - wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = (*bad_wqe_cnt)+1; } /* @@ -765,7 +967,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, u64 h_ret; int bad_wqe_cnt = 0; int squeue_locked = 0; - unsigned long spl_flags = 0; + unsigned long flags = 0; /* do query_qp to obtain current attr values */ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); @@ -835,7 +1037,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit1; } - ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x " + ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " "new qp_state=%x attribute_mask=%x", my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); @@ -851,7 +1053,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit1; } - if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state))) + mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); + if (mqpcb->qp_state) update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); else { ret = -EINVAL; @@ -886,6 +1089,17 @@ static int internal_modify_qp(struct ib_qp *ibqp, "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x", my_qp, ibqp->qp_num, statetrans); + /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set + * in non-LL UD QPs. + */ + if ((my_qp->qp_type == IB_QPT_UD) && + (my_qp->ext_type != EQPT_LLQP) && + (statetrans == IB_QPST_INIT2RTR) && + (shca->hw_level >= 0x22)) { + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->send_grh_flag = 1; + } + /* sqe -> rts: set purge bit of bad wqe before actual trans */ if ((my_qp->qp_type == IB_QPT_UD || my_qp->qp_type == IB_QPT_GSI || @@ -895,10 +1109,10 @@ static int internal_modify_qp(struct ib_qp *ibqp, if (!ibqp->uobject) { struct ehca_wqe *wqe; /* lock send queue */ - spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_s, flags); squeue_locked = 1; /* mark next free wqe */ - wqe = (struct ehca_wqe*) + wqe = (struct ehca_wqe *) ipz_qeit_get(&my_qp->ipz_squeue); wqe->optype = wqe->wqef = 0xff; ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", @@ -1133,7 +1347,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx " - "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num); + "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); goto modify_qp_exit2; } @@ -1181,7 +1395,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, modify_qp_exit2: if (squeue_locked) { /* this means: sqe -> rts */ - spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); my_qp->sqerr_purgeflag = 1; } @@ -1232,7 +1446,7 @@ int ehca_query_qp(struct ib_qp *qp, } if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { - ehca_err(qp->device,"Invalid attribute mask " + ehca_err(qp->device, "Invalid attribute mask " "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", my_qp, qp->qp_num, qp_attr_mask); return -EINVAL; @@ -1240,7 +1454,7 @@ int ehca_query_qp(struct ib_qp *qp, qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!qpcb) { - ehca_err(qp->device,"Out of memory for qpcb " + ehca_err(qp->device, "Out of memory for qpcb " "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); return -ENOMEM; } @@ -1252,7 +1466,7 @@ int ehca_query_qp(struct ib_qp *qp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - ehca_err(qp->device,"hipz_h_query_qp() failed " + ehca_err(qp->device, "hipz_h_query_qp() failed " "ehca_qp=%p qp_num=%x h_ret=%lx", my_qp, qp->qp_num, h_ret); goto query_qp_exit1; @@ -1263,7 +1477,7 @@ int ehca_query_qp(struct ib_qp *qp, if (qp_attr->cur_qp_state == -EINVAL) { ret = -EINVAL; - ehca_err(qp->device,"Got invalid ehca_qp_state=%x " + ehca_err(qp->device, "Got invalid ehca_qp_state=%x " "ehca_qp=%p qp_num=%x", qpcb->qp_state, my_qp, qp->qp_num); goto query_qp_exit1; @@ -1312,6 +1526,9 @@ int ehca_query_qp(struct ib_qp *qp, qp_attr->alt_port_num = qpcb->alt_phys_port; qp_attr->alt_timeout = qpcb->timeout_al; + qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; + qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; + /* primary av */ qp_attr->ah_attr.sl = qpcb->service_level; @@ -1367,53 +1584,170 @@ query_qp_exit1: return ret; } -int ehca_destroy_qp(struct ib_qp *ibqp) +int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + struct ehca_qp *my_qp = + container_of(ibsrq, struct ehca_qp, ib_srq); + struct ehca_pd *my_pd = + container_of(ibsrq->pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(ibsrq->pd->device, struct ehca_shca, ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 update_mask; + u64 h_ret; + int ret = 0; + + u32 cur_pid = current->tgid; + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ibsrq->pd->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + update_mask = 0; + if (attr_mask & IB_SRQ_LIMIT) { + attr_mask &= ~IB_SRQ_LIMIT; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); + mqpcb->curr_srq_limit = + EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit); + mqpcb->qp_aff_asyn_ev_log_reg = + EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); + } + + /* by now, all bits in attr_mask should have been cleared */ + if (attr_mask) { + ehca_err(ibsrq->device, "invalid attribute mask bits set " + "attr_mask=%x", attr_mask); + ret = -EINVAL; + goto modify_srq_exit0; + } + + if (ehca_debug_level) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, + NULL, update_mask, mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx " + "ehca_qp=%p qp_num=%x", + h_ret, my_qp, my_qp->real_qp_num); + } + +modify_srq_exit0: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) +{ + struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); + struct ehca_pd *my_pd = container_of(srq->pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + u32 cur_pid = current->tgid; + int ret = 0; + u64 h_ret; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(srq->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(srq->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, + NULL, qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(srq->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, my_qp->real_qp_num, h_ret); + goto query_srq_exit1; + } + + srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; + srq_attr->srq_limit = EHCA_BMASK_GET( + MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); + + if (ehca_debug_level) + ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + +query_srq_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject) +{ + struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); u32 cur_pid = current->tgid; - u32 qp_num = ibqp->qp_num; + u32 qp_num = my_qp->real_qp_num; int ret; u64 h_ret; u8 port_num; enum ib_qp_type qp_type; unsigned long flags; - if (ibqp->uobject) { + if (uobject) { if (my_qp->mm_count_galpa || my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { - ehca_err(ibqp->device, "Resources still referenced in " - "user space qp_num=%x", ibqp->qp_num); + ehca_err(dev, "Resources still referenced in " + "user space qp_num=%x", qp_num); return -EINVAL; } if (my_pd->ownpid != cur_pid) { - ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", + ehca_err(dev, "Invalid caller pid=%x ownpid=%x", cur_pid, my_pd->ownpid); return -EINVAL; } } if (my_qp->send_cq) { - ret = ehca_cq_unassign_qp(my_qp->send_cq, - my_qp->real_qp_num); + ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); if (ret) { - ehca_err(ibqp->device, "Couldn't unassign qp from " + ehca_err(dev, "Couldn't unassign qp from " "send_cq ret=%x qp_num=%x cq_num=%x", ret, - my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number); + qp_num, my_qp->send_cq->cq_number); return ret; } } - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + write_lock_irqsave(&ehca_qp_idr_lock, flags); idr_remove(&ehca_qp_idr, my_qp->token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { - ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx " + ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx " "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); return ehca2ib_return_code(h_ret); } @@ -1424,7 +1758,7 @@ int ehca_destroy_qp(struct ib_qp *ibqp) /* no support for IB_QPT_SMI yet */ if (qp_type == IB_QPT_GSI) { struct ib_event event; - ehca_info(ibqp->device, "device %s: port %x is inactive.", + ehca_info(dev, "device %s: port %x is inactive.", shca->ib_device.name, port_num); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ERR; @@ -1433,18 +1767,34 @@ int ehca_destroy_qp(struct ib_qp *ibqp) ib_dispatch_event(&event); } - ipz_queue_dtor(&my_qp->ipz_rqueue); - ipz_queue_dtor(&my_qp->ipz_squeue); + if (HAS_RQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + if (HAS_SQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); kmem_cache_free(qp_cache, my_qp); return 0; } +int ehca_destroy_qp(struct ib_qp *qp) +{ + return internal_destroy_qp(qp->device, + container_of(qp, struct ehca_qp, ib_qp), + qp->uobject); +} + +int ehca_destroy_srq(struct ib_srq *srq) +{ + return internal_destroy_qp(srq->device, + container_of(srq, struct ehca_qp, ib_srq), + srq->uobject); +} + int ehca_init_qp_cache(void) { qp_cache = kmem_cache_create("ehca_cache_qp", sizeof(struct ehca_qp), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!qp_cache) return -ENOMEM; return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index caec9dee09e1..94eed70fedf5 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -3,8 +3,9 @@ * * post_send/recv, poll_cq, req_notify * - * Authors: Waleri Fomin <fomin@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Waleri Fomin <fomin@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> * Reinhard Ernst <rernst@de.ibm.com> * * Copyright (c) 2005 IBM Corporation @@ -78,7 +79,8 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, } if (ehca_debug_level) { - ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", + ipz_rqueue); ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); } @@ -98,7 +100,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; struct ib_sge *sge = send_wr->sg_list; ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " - "send_flags=%x opcode=%x",idx, send_wr->wr_id, + "send_flags=%x opcode=%x", idx, send_wr->wr_id, send_wr->num_sge, send_wr->send_flags, send_wr->opcode); if (mad_hdr) { @@ -115,7 +117,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) mad_hdr->attr_mod); } for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = (u8 *) abs_to_virt(sge->addr); + u8 *data = (u8 *)abs_to_virt(sge->addr); ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " "lkey=%x", idx, j, data, sge->length, sge->lkey); @@ -362,10 +364,10 @@ int ehca_post_send(struct ib_qp *qp, struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; - unsigned long spl_flags; + unsigned long flags; /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_s, flags); /* loop processes list of send reqs */ for (cur_send_wr = send_wr; cur_send_wr != NULL; @@ -406,26 +408,31 @@ int ehca_post_send(struct ib_qp *qp, } /* eof for cur_send_wr */ post_send_exit0: - /* UNLOCK the QUEUE */ - spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); iosync(); /* serialize GAL register access */ hipz_update_sqa(my_qp, wqe_cnt); + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); return ret; } -int ehca_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) +static int internal_post_recv(struct ehca_qp *my_qp, + struct ib_device *dev, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) { - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); struct ib_recv_wr *cur_recv_wr; struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; - unsigned long spl_flags; + unsigned long flags; + + if (unlikely(!HAS_RQ(my_qp))) { + ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", + my_qp, my_qp->real_qp_num, my_qp->ext_type); + return -ENODEV; + } /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_r, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_r, flags); /* loop processes list of send reqs */ for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; @@ -439,8 +446,8 @@ int ehca_post_recv(struct ib_qp *qp, *bad_recv_wr = cur_recv_wr; if (wqe_cnt == 0) { ret = -ENOMEM; - ehca_err(qp->device, "Too many posted WQEs " - "qp_num=%x", qp->qp_num); + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); } goto post_recv_exit0; } @@ -455,23 +462,39 @@ int ehca_post_recv(struct ib_qp *qp, *bad_recv_wr = cur_recv_wr; if (wqe_cnt == 0) { ret = -EINVAL; - ehca_err(qp->device, "Could not write WQE " - "qp_num=%x", qp->qp_num); + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); } goto post_recv_exit0; } wqe_cnt++; - ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, qp->qp_num, wqe_cnt); + ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, my_qp->real_qp_num, wqe_cnt); } /* eof for cur_recv_wr */ post_recv_exit0: - spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags); iosync(); /* serialize GAL register access */ hipz_update_rqa(my_qp, wqe_cnt); + spin_unlock_irqrestore(&my_qp->spinlock_r, flags); return ret; } +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp), + qp->device, recv_wr, bad_recv_wr); +} + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), + srq->device, recv_wr, bad_recv_wr); +} + /* * ib_wc_opcode table converts ehca wc opcode to ib * Since we use zero to indicate invalid opcode, the actual ib opcode must @@ -494,6 +517,7 @@ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) int ret = 0; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; + struct ehca_qp *my_qp; int cqe_count = 0; poll_cq_one_read_cqe: @@ -511,9 +535,11 @@ poll_cq_one_read_cqe: cqe_count++; if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { - struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); + struct ehca_qp *qp; int purgeflag; - unsigned long spl_flags; + unsigned long flags; + + qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); if (!qp) { ehca_err(cq->device, "cq_num=%x qp_num=%x " "could not find qp -> ignore cqe", @@ -523,13 +549,13 @@ poll_cq_one_read_cqe: /* ignore this purged cqe */ goto poll_cq_one_read_cqe; } - spin_lock_irqsave(&qp->spinlock_s, spl_flags); + spin_lock_irqsave(&qp->spinlock_s, flags); purgeflag = qp->sqerr_purgeflag; - spin_unlock_irqrestore(&qp->spinlock_s, spl_flags); + spin_unlock_irqrestore(&qp->spinlock_s, flags); if (purgeflag) { - ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " - "src_qp=%x", + ehca_dbg(cq->device, + "Got CQE with purged bit qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); if (ehca_debug_level) ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", @@ -545,7 +571,7 @@ poll_cq_one_read_cqe: } /* tracing cqe */ - if (ehca_debug_level) { + if (unlikely(ehca_debug_level)) { ehca_dbg(cq->device, "Received COMPLETION ehca_cq=%p cq_num=%x -----", my_cq, my_cq->cq_number); @@ -579,7 +605,11 @@ poll_cq_one_read_cqe: } else wc->status = IB_WC_SUCCESS; - wc->qp = NULL; + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + wc->qp = &my_qp->ib_qp; + read_unlock(&ehca_qp_idr_lock); + wc->byte_len = cqe->nr_bytes_transferred; wc->pkey_index = cqe->pkey_index; wc->slid = cqe->rlid; @@ -589,7 +619,7 @@ poll_cq_one_read_cqe: wc->imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; - if (wc->status != IB_WC_SUCCESS) + if (unlikely(wc->status != IB_WC_SUCCESS)) ehca_dbg(cq->device, "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe " "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx " @@ -610,7 +640,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) int nr; struct ib_wc *current_wc = wc; int ret = 0; - unsigned long spl_flags; + unsigned long flags; if (num_entries < 1) { ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " @@ -619,14 +649,14 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) goto poll_cq_exit0; } - spin_lock_irqsave(&my_cq->spinlock, spl_flags); + spin_lock_irqsave(&my_cq->spinlock, flags); for (nr = 0; nr < num_entries; nr++) { ret = ehca_poll_cq_one(cq, current_wc); if (ret) break; current_wc++; } /* eof for nr */ - spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + spin_unlock_irqrestore(&my_cq->spinlock, flags); if (ret == -EAGAIN || !ret) ret = nr; @@ -637,7 +667,6 @@ poll_cq_exit0: int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - unsigned long spl_flags; int ret = 0; switch (notify_flags & IB_CQ_SOLICITED_MASK) { @@ -652,6 +681,7 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) } if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + unsigned long spl_flags; spin_lock_irqsave(&my_cq->spinlock, spl_flags); ret = ipz_qeit_is_valid(&my_cq->ipz_queue); spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 973c4b591545..57c77a715f46 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -59,6 +59,7 @@ #include <linux/cpu.h> #include <linux/device.h> +#include <asm/atomic.h> #include <asm/abs_addr.h> #include <asm/ibmebus.h> #include <asm/io.h> @@ -92,14 +93,14 @@ extern int ehca_debug_level; #define ehca_gen_dbg(format, arg...) \ do { \ if (unlikely(ehca_debug_level)) \ - printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\ + printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ get_paca()->paca_index, __FUNCTION__, ## arg); \ } while (0) #define ehca_gen_warn(format, arg...) \ do { \ if (unlikely(ehca_debug_level)) \ - printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\ + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ get_paca()->paca_index, __FUNCTION__, ## arg); \ } while (0) @@ -113,12 +114,12 @@ extern int ehca_debug_level; * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex> */ #define ehca_dmp(adr, len, format, args...) \ - do { \ - unsigned int x; \ + do { \ + unsigned int x; \ unsigned int l = (unsigned int)(len); \ - unsigned char *deb = (unsigned char*)(adr); \ + unsigned char *deb = (unsigned char *)(adr); \ for (x = 0; x < l; x += 16) { \ - printk("EHCA_DMP:%s " format \ + printk(KERN_INFO "EHCA_DMP:%s " format \ " adr=%p ofs=%04x %016lx %016lx\n", \ __FUNCTION__, ##args, deb, x, \ *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ @@ -127,16 +128,16 @@ extern int ehca_debug_level; } while (0) /* define a bitmask, little endian version */ -#define EHCA_BMASK(pos,length) (((pos)<<16)+(length)) +#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) /* define a bitmask, the ibm way... */ -#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1)) +#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) /* internal function, don't use */ -#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff) +#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) /* internal function, don't use */ -#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff)) +#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) /** * EHCA_BMASK_SET - return value shifted and masked by mask @@ -144,30 +145,16 @@ extern int ehca_debug_level; * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask * in variable */ -#define EHCA_BMASK_SET(mask,value) \ - ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask)) +#define EHCA_BMASK_SET(mask, value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) /** * EHCA_BMASK_GET - extract a parameter from value by mask */ -#define EHCA_BMASK_GET(mask,value) \ - (EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask))) - +#define EHCA_BMASK_GET(mask, value) \ + (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) /* Converts ehca to ib return code */ -static inline int ehca2ib_return_code(u64 ehca_rc) -{ - switch (ehca_rc) { - case H_SUCCESS: - return 0; - case H_BUSY: - return -EBUSY; - case H_NO_MEM: - return -ENOMEM; - default: - return -EINVAL; - } -} - +int ehca2ib_return_code(u64 ehca_rc); #endif /* EHCA_TOOLS_H */ diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 73db920b6945..4bc687fdf531 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -70,7 +70,7 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context) static void ehca_mm_open(struct vm_area_struct *vma) { - u32 *count = (u32*)vma->vm_private_data; + u32 *count = (u32 *)vma->vm_private_data; if (!count) { ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", vma->vm_start, vma->vm_end); @@ -86,7 +86,7 @@ static void ehca_mm_open(struct vm_area_struct *vma) static void ehca_mm_close(struct vm_area_struct *vma) { - u32 *count = (u32*)vma->vm_private_data; + u32 *count = (u32 *)vma->vm_private_data; if (!count) { ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", vma->vm_start, vma->vm_end); @@ -149,7 +149,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, ehca_gen_err("vm_insert_page() failed rc=%x", ret); return ret; } - start += PAGE_SIZE; + start += PAGE_SIZE; } vma->vm_private_data = mm_count; (*mm_count)++; @@ -215,7 +215,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, case 2: /* qp rqueue_addr */ ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, + &qp->mm_count_rqueue); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", @@ -227,7 +228,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, case 3: /* qp squeue_addr */ ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, + &qp->mm_count_squeue); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", @@ -253,16 +255,16 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ u32 cur_pid = current->tgid; u32 ret; - unsigned long flags; struct ehca_cq *cq; struct ehca_qp *qp; struct ehca_pd *pd; + struct ib_uobject *uobject; switch (q_type) { case 1: /* CQ */ - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, idr_handle); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + read_unlock(&ehca_cq_idr_lock); /* make sure this mmap really belongs to the authorized user */ if (!cq) @@ -288,9 +290,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) break; case 2: /* QP */ - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, idr_handle); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + read_unlock(&ehca_qp_idr_lock); /* make sure this mmap really belongs to the authorized user */ if (!qp) @@ -304,7 +306,8 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return -ENOMEM; } - if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context) + uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; + if (!uobject || uobject->context != context) return -EINVAL; ret = ehca_mmap_qp(vma, qp, rsrc_type); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 5766ae3a2029..fdbfebea7d11 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -5,6 +5,7 @@ * * Authors: Christoph Raisch <raisch@de.ibm.com> * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> * Gerd Bayer <gerd.bayer@de.ibm.com> * Waleri Fomin <fomin@de.ibm.com> * @@ -51,10 +52,13 @@ #define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) #define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) #define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) +#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) #define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) #define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) #define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) #define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) +#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) +#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) #define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) #define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) @@ -62,6 +66,12 @@ #define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) #define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) +#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) +#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) + #define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) #define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) #define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) @@ -74,10 +84,7 @@ #define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) #define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) -/* direct access qp controls */ -#define DAQP_CTRL_ENABLE 0x01 -#define DAQP_CTRL_SEND_COMP 0x20 -#define DAQP_CTRL_RECV_COMP 0x40 +static DEFINE_SPINLOCK(hcall_lock); static u32 get_longbusy_msecs(int longbusy_rc) { @@ -155,7 +162,7 @@ static long ehca_plpar_hcall9(unsigned long opcode, { long ret; int i, sleep_msecs, lock_is_set = 0; - unsigned long flags; + unsigned long flags = 0; ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", @@ -284,68 +291,73 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, } u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp, struct ehca_alloc_qp_parms *parms) { u64 ret; - u64 allocate_controls; - u64 max_r10_reg; + u64 allocate_controls, max_r10_reg, r11, r12; u64 outs[PLPAR_HCALL9_BUFSIZE]; - u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1; - u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1; - int daqp_ctrl = parms->daqp_ctrl; allocate_controls = - EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, - (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0) + EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) + | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, + parms->squeue.page_size) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, + parms->rqueue.page_size) | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, - (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0) + !!(parms->ll_comp_flags & LLQP_RECV_COMP)) | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, - (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0) + !!(parms->ll_comp_flags & LLQP_SEND_COMP)) | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, parms->ud_av_l_key_ctl) | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); max_r10_reg = EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, - max_nr_send_wqes) + parms->squeue.max_wr + 1) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, - max_nr_receive_wqes) + parms->rqueue.max_wr + 1) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, - parms->max_send_sge) + parms->squeue.max_sge) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, - parms->max_recv_sge); + parms->rqueue.max_sge); + + r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); + + if (parms->ext_type == EQPT_SRQ) + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); + else + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ allocate_controls, /* r5 */ - qp->send_cq->ipz_cq_handle.handle, - qp->recv_cq->ipz_cq_handle.handle, - parms->ipz_eq_handle.handle, - ((u64)qp->token << 32) | parms->pd.value, - max_r10_reg, /* r10 */ - parms->ud_av_l_key_ctl, /* r11 */ - 0); - qp->ipz_qp_handle.handle = outs[0]; - qp->real_qp_num = (u32)outs[1]; - parms->act_nr_send_wqes = + parms->send_cq_handle.handle, + parms->recv_cq_handle.handle, + parms->eq_handle.handle, + ((u64)parms->token << 32) | parms->pd.value, + max_r10_reg, r11, r12); + + parms->qp_handle.handle = outs[0]; + parms->real_qp_num = (u32)outs[1]; + parms->squeue.act_nr_wqes = (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); - parms->act_nr_recv_wqes = + parms->rqueue.act_nr_wqes = (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); - parms->act_nr_send_sges = + parms->squeue.act_nr_sges = (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); - parms->act_nr_recv_sges = + parms->rqueue.act_nr_sges = (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); - parms->nr_sq_pages = + parms->squeue.queue_size = (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); - parms->nr_rq_pages = + parms->rqueue.queue_size = (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); if (ret == H_SUCCESS) - hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]); + hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]); if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lx", ret); @@ -423,7 +435,8 @@ u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, { return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, adapter_handle.handle, /* r4 */ - queue_type | pagesize << 8, /* r5 */ + (u64)queue_type | ((u64)pagesize) << 8, + /* r5 */ resource_handle, /* r6 */ logical_address_of_page, /* r7 */ count, /* r8 */ @@ -492,13 +505,13 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, const u64 count, const struct h_galpa galpa) { - if (count != 1) { + if (count > 1) { ehca_gen_err("Page counter=%lx", count); return H_PARAMETER; } - return hipz_h_register_rpage(adapter_handle,pagesize,queue_type, - qp_handle.handle,logical_address_of_page, + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + qp_handle.handle, logical_address_of_page, count); } @@ -518,9 +531,9 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, qp_handle.handle, /* r6 */ 0, 0, 0, 0, 0, 0); if (log_addr_next_sq_wqe2processed) - *log_addr_next_sq_wqe2processed = (void*)outs[0]; + *log_addr_next_sq_wqe2processed = (void *)outs[0]; if (log_addr_next_rq_wqe2processed) - *log_addr_next_rq_wqe2processed = (void*)outs[1]; + *log_addr_next_rq_wqe2processed = (void *)outs[1]; return ret; } @@ -720,6 +733,9 @@ u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, u64 ret; u64 outs[PLPAR_HCALL9_BUFSIZE]; + ehca_gen_dbg("kernel PAGE_SIZE=%x access_ctrl=%016x " + "vaddr=%lx length=%lx", + (u32)PAGE_SIZE, access_ctrl, vaddr, length); ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ 5, /* r5 */ @@ -742,8 +758,22 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, const u64 logical_address_of_page, const u64 count) { + extern int ehca_debug_level; u64 ret; + if (unlikely(ehca_debug_level >= 2)) { + if (count > 1) { + u64 *kpage; + int i; + kpage = (u64 *)abs_to_virt(logical_address_of_page); + for (i = 0; i < count; i++) + ehca_gen_dbg("kpage[%d]=%p", + i, (void *)kpage[i]); + } else + ehca_gen_dbg("kpage=%p", + (void *)logical_address_of_page); + } + if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { ehca_gen_err("logical_address_of_page not on a 4k boundary " "adapter_handle=%lx mr=%p mr_handle=%lx " diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 2869f7dd6196..60ce02b70663 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -78,7 +78,6 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, * initialize resources, create empty QPPTs (2 rings). */ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp, struct ehca_alloc_qp_parms *parms); u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c index 0b1a4772c78a..214821095cb1 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -50,7 +50,7 @@ int hcall_map_page(u64 physaddr, u64 *mapaddr) int hcall_unmap_page(u64 mapaddr) { - iounmap((volatile void __iomem*)mapaddr); + iounmap((volatile void __iomem *) mapaddr); return 0; } diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h index 20898a153446..868735fd3187 100644 --- a/drivers/infiniband/hw/ehca/hipz_fns_core.h +++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h @@ -53,10 +53,10 @@ #define hipz_galpa_load_cq(gal, offset) \ hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) -#define hipz_galpa_store_qp(gal,offset, value) \ +#define hipz_galpa_store_qp(gal, offset, value) \ hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) #define hipz_galpa_load_qp(gal, offset) \ - hipz_galpa_load(gal,QPTEMM_OFFSET(offset)) + hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) { diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index fad91368dc5a..d9739e554515 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -161,10 +161,11 @@ struct hipz_qptemm { /* 0x1000 */ }; -#define QPX_SQADDER EHCA_BMASK_IBM(48,63) -#define QPX_RQADDER EHCA_BMASK_IBM(48,63) +#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) -#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) /* MRMWPT Entry Memory Map */ struct hipz_mrmwmm { @@ -186,7 +187,7 @@ struct hipz_mrmwmm { }; -#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x) +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) struct hipz_qpedmm { /* 0x00 */ @@ -237,7 +238,7 @@ struct hipz_qpedmm { u64 qpedx_rrva3; }; -#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x) +#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) /* CQ Table Entry Memory Map */ struct hipz_cqtemm { @@ -262,12 +263,12 @@ struct hipz_cqtemm { /* 0x1000 */ }; -#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63) -#define CQX_FECADDER EHCA_BMASK_IBM(32,63) -#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0) -#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0) +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) +#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) -#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x) +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) /* EQ Table Entry Memory Map */ struct hipz_eqtemm { @@ -292,7 +293,7 @@ struct hipz_eqtemm { }; -#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x) +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) /* access control defines for MR/MW */ #define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 @@ -360,6 +361,24 @@ struct hipz_query_hca { u32 max_neq; } __attribute__ ((packed)); +#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) +#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) +#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) +#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) +#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) +#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) +#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) +#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) +#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) +#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) +#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) +#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) +#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) +#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) +#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) +#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) +#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) + /* query port response block */ struct hipz_query_port { u32 state; diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index bf7a40088f61..a090c679c397 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -40,6 +40,11 @@ #include "ehca_tools.h" #include "ipz_pt_fn.h" +#include "ehca_classes.h" + +#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) + +struct kmem_cache *small_qp_cache; void *ipz_qpageit_get_inc(struct ipz_queue *queue) { @@ -49,7 +54,7 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue) queue->current_q_offset -= queue->pagesize; ret = NULL; } - if (((u64)ret) % EHCA_PAGESIZE) { + if (((u64)ret) % queue->pagesize) { ehca_gen_err("ERROR!! not at PAGE-Boundary"); return NULL; } @@ -83,80 +88,195 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) return -EINVAL; } -int ipz_queue_ctor(struct ipz_queue *queue, - const u32 nr_of_pages, - const u32 pagesize, const u32 qe_size, const u32 nr_of_sg) +#if PAGE_SHIFT < EHCA_PAGESHIFT +#error Kernel pages must be at least as large than eHCA pages (4K) ! +#endif + +/* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hca queue pages + */ +static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) { - int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; - int f; + int k, f = 0; + u8 *kpage; - if (pagesize > PAGE_SIZE) { - ehca_gen_err("FATAL ERROR: pagesize=%x is greater " - "than kernel page size", pagesize); - return 0; - } - if (!pages_per_kpage) { - ehca_gen_err("FATAL ERROR: invalid kernel page size. " - "pages_per_kpage=%x", pages_per_kpage); - return 0; - } - queue->queue_length = nr_of_pages * pagesize; - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); - if (!queue->queue_pages) { - ehca_gen_err("ERROR!! didn't get the memory"); - return 0; - } - memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); - /* - * allocate pages for queue: - * outer loop allocates whole kernel pages (page aligned) and - * inner loop divides a kernel page into smaller hca queue pages - */ - f = 0; while (f < nr_of_pages) { - u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL); - int k; + kpage = (u8 *)get_zeroed_page(GFP_KERNEL); if (!kpage) - goto ipz_queue_ctor_exit0; /*NOMEM*/ - for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) { - (queue->queue_pages)[f] = (struct ipz_page *)kpage; + goto out; + + for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { + queue->queue_pages[f] = (struct ipz_page *)kpage; kpage += EHCA_PAGESIZE; f++; } } + return 1; - queue->current_q_offset = 0; +out: + for (f = 0; f < nr_of_pages && queue->queue_pages[f]; + f += PAGES_PER_KPAGE) + free_page((unsigned long)(queue->queue_pages)[f]); + return 0; +} + +static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page; + unsigned long bit; + + mutex_lock(&pd->lock); + + if (!list_empty(&pd->free[order])) + page = list_entry(pd->free[order].next, + struct ipz_small_queue_page, list); + else { + page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); + if (!page) + goto out; + + page->page = get_zeroed_page(GFP_KERNEL); + if (!page->page) { + kmem_cache_free(small_qp_cache, page); + goto out; + } + + list_add(&page->list, &pd->free[order]); + } + + bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); + __set_bit(bit, page->bitmap); + page->fill++; + + if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) + list_move(&page->list, &pd->full[order]); + + mutex_unlock(&pd->lock); + + queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); + queue->small_page = page; + return 1; + +out: + ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + return 0; +} + +static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page = queue->small_page; + unsigned long bit; + int free_page = 0; + + bit = ((unsigned long)queue->queue_pages[0] & PAGE_MASK) + >> (order + 9); + + mutex_lock(&pd->lock); + + __clear_bit(bit, page->bitmap); + page->fill--; + + if (page->fill == 0) { + list_del(&page->list); + free_page = 1; + } + + if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) + /* the page was full until we freed the chunk */ + list_move_tail(&page->list, &pd->free[order]); + + mutex_unlock(&pd->lock); + + if (free_page) { + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } +} + +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small) +{ + if (pagesize > PAGE_SIZE) { + ehca_gen_err("FATAL ERROR: pagesize=%x " + "is greater than kernel page size", pagesize); + return 0; + } + + /* init queue fields */ + queue->queue_length = nr_of_pages * pagesize; + queue->pagesize = pagesize; queue->qe_size = qe_size; queue->act_nr_of_sg = nr_of_sg; - queue->pagesize = pagesize; + queue->current_q_offset = 0; queue->toggle_state = 1; - return 1; + queue->small_page = NULL; - ipz_queue_ctor_exit0: - ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x", - queue, f, nr_of_pages); - for (f = 0; f < nr_of_pages; f += pages_per_kpage) { - if (!(queue->queue_pages)[f]) - break; - free_page((unsigned long)(queue->queue_pages)[f]); + /* allocate queue page pointers */ + queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; } + memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); + + /* allocate actual queue pages */ + if (is_small) { + if (!alloc_small_queue_page(queue, pd)) + goto ipz_queue_ctor_exit0; + } else + if (!alloc_queue_pages(queue, nr_of_pages)) + goto ipz_queue_ctor_exit0; + + return 1; + +ipz_queue_ctor_exit0: + ehca_gen_err("Couldn't alloc pages queue=%p " + "nr_of_pages=%x", queue, nr_of_pages); + vfree(queue->queue_pages); + return 0; } -int ipz_queue_dtor(struct ipz_queue *queue) +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) { - int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; - int g; - int nr_pages; + int i, nr_pages; if (!queue || !queue->queue_pages) { ehca_gen_dbg("queue or queue_pages is NULL"); return 0; } - nr_pages = queue->queue_length / queue->pagesize; - for (g = 0; g < nr_pages; g += pages_per_kpage) - free_page((unsigned long)(queue->queue_pages)[g]); + + if (queue->small_page) + free_small_queue_page(queue, pd); + else { + nr_pages = queue->queue_length / queue->pagesize; + for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) + free_page((unsigned long)queue->queue_pages[i]); + } + vfree(queue->queue_pages); return 1; } + +int ehca_init_small_qp_cache(void) +{ + small_qp_cache = kmem_cache_create("ehca_cache_small_qp", + sizeof(struct ipz_small_queue_page), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!small_qp_cache) + return -ENOMEM; + + return 0; +} + +void ehca_cleanup_small_qp_cache(void) +{ + kmem_cache_destroy(small_qp_cache); +} diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index 57f141a36bce..c6937a044e8a 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -51,11 +51,25 @@ #include "ehca_tools.h" #include "ehca_qes.h" +struct ehca_pd; +struct ipz_small_queue_page; + /* struct generic ehca page */ struct ipz_page { u8 entries[EHCA_PAGESIZE]; }; +#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) + +struct ipz_small_queue_page { + unsigned long page; + unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; + int fill; + void *mapped_addr; + u32 mmap_count; + struct list_head list; +}; + /* struct generic queue in linux kernel virtual memory (kv) */ struct ipz_queue { u64 current_q_offset; /* current queue entry */ @@ -66,7 +80,8 @@ struct ipz_queue { u32 queue_length; /* queue length allocated in bytes */ u32 pagesize; u32 toggle_state; /* toggle flag - per page */ - u32 dummy3; /* 64 bit alignment */ + u32 offset; /* save offset within page for small_qp */ + struct ipz_small_queue_page *small_page; }; /* @@ -105,7 +120,6 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue); * step in struct ipz_queue, will wrap in ringbuffer * returns address (kv) of Queue Entry BEFORE increment * warning don't use in parallel with ipz_qpageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries */ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) { @@ -121,31 +135,24 @@ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) } /* + * return a bool indicating whether current Queue Entry is valid + */ +static inline int ipz_qeit_is_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); +} + +/* * return current Queue Entry, increment Queue Entry iterator by one * step in struct ipz_queue, will wrap in ringbuffer * returns address (kv) of Queue Entry BEFORE increment * returns 0 and does not increment, if wrong valid state * warning don't use in parallel with ipz_qpageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries */ static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) { - struct ehca_cqe *cqe = ipz_qeit_get(queue); - u32 cqe_flags = cqe->cqe_flags; - - if ((cqe_flags >> 7) != (queue->toggle_state & 1)) - return NULL; - - ipz_qeit_get_inc(queue); - return cqe; -} - -static inline int ipz_qeit_is_valid(struct ipz_queue *queue) -{ - struct ehca_cqe *cqe = ipz_qeit_get(queue); - u32 cqe_flags = cqe->cqe_flags; - - return cqe_flags >> 7 == (queue->toggle_state & 1); + return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; } /* @@ -196,9 +203,10 @@ struct ipz_qpt { * see ipz_qpt_ctor() * returns true if ok, false if out of memory */ -int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages, - const u32 pagesize, const u32 qe_size, - const u32 nr_of_sg); +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small); /* * destructor for a ipz_queue_t @@ -206,7 +214,7 @@ int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages, * see ipz_queue_ctor() * returns true if ok, false if queue was NULL-ptr of free failed */ -int ipz_queue_dtor(struct ipz_queue *queue); +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); /* * constructor for a ipz_qpt_t, @@ -248,7 +256,7 @@ void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) { void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *) ret; + u32 qe = *(u8 *)ret; if ((qe >> 7) != (queue->toggle_state & 1)) return NULL; ipz_qeit_eq_get_inc(queue); /* this is a good one */ @@ -258,7 +266,7 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) { void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *) ret; + u32 qe = *(u8 *)ret; if ((qe >> 7) != (queue->toggle_state & 1)) return NULL; return ret; |