diff options
author | Steve Wise <swise@opengridcomputing.com> | 2016-09-16 16:54:52 +0200 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-10-07 22:54:40 +0200 |
commit | 49b53a93a64ab0aaec10851b004297a3ac885433 (patch) | |
tree | 6f252df36ba90d7baccddfbae6bb57d1ea6a4937 /drivers/infiniband/hw/cxgb4/t4.h | |
parent | cxgb4: advertise support for FR_NSMR_TPTE_WR (diff) | |
download | linux-49b53a93a64ab0aaec10851b004297a3ac885433.tar.xz linux-49b53a93a64ab0aaec10851b004297a3ac885433.zip |
iw_cxgb4: add fast-path for small REG_MR operations
When processing a REG_MR work request, if fw supports the
FW_RI_NSMR_TPTE_WR work request, and if the page list for this
registration is <= 2 pages, and the current state of the mr is INVALID,
then use FW_RI_NSMR_TPTE_WR to pass down a fully populated TPTE for FW
to write. This avoids FW having to do an async read of the TPTE blocking
the SQ until the read completes.
To know if the current MR state is INVALID or not, iw_cxgb4 must track the
state of each fastreg MR. The c4iw_mr struct state is updated as REG_MR
and LOCAL_INV WRs are posted and completed, when a reg_mr is destroyed,
and when RECV completions are processed that include a local invalidation.
This optimization increases small IO IOPS for both iSER and NVMF.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb4/t4.h')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 6126bbe36095..b2bfbb1eef1a 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -95,6 +95,7 @@ union t4_wr { struct fw_ri_rdma_read_wr read; struct fw_ri_bind_mw_wr bind; struct fw_ri_fr_nsmr_wr fr; + struct fw_ri_fr_nsmr_tpte_wr fr_tpte; struct fw_ri_inv_lstag_wr inv; struct t4_status_page status; __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; @@ -170,7 +171,7 @@ struct t4_cqe { __be32 msn; } rcqe; struct { - u32 nada1; + u32 stag; u16 nada2; u16 cidx; } scqe; @@ -232,6 +233,7 @@ struct t4_cqe { /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) +#define CQE_WRID_FR_STAG(x) (be32_to_cpu((x)->u.scqe.stag)) /* generic accessor macros */ #define CQE_WRID_HI(x) (be32_to_cpu((x)->u.gen.wrid_hi)) |