From 115d9f95ea7ab780ef315dc356bebba2e07cb731 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 13 Jul 2022 13:57:03 +0300 Subject: net/mlx5e: Remove WARN_ON when trying to offload an unsupported TLS cipher/version The driver reports whether TX/RX TLS device offloads are supported, but not which ciphers/versions, these should be handled by returning -EOPNOTSUPP when .tls_dev_add() is called. Remove the WARN_ON kernel trace when the driver gets a request to offload a cipher/version that is not supported as it is expected. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index 814f2a56f633..30a70d139046 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -54,7 +54,7 @@ static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, struct mlx5_core_dev *mdev = priv->mdev; int err; - if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info))) + if (!mlx5e_ktls_type_check(mdev, crypto_info)) return -EOPNOTSUPP; if (direction == TLS_OFFLOAD_CTX_DIR_TX) -- cgit v1.2.3 From 903f2194f74bbd289f3170114035d472a36a8ab4 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Mon, 18 Jul 2022 09:44:13 +0300 Subject: net/mlx5e: TC, Fix post_act to not match on in_port metadata The cited commit changed CT to use multi table actions post act infrastructure instead of using it own post act infrastructure, this broke decap during VF tunnel offload (Stack devices) with CT due to wrong match on in_port metadata in the post act table. This changed only broke VF tunnel offload because it modify the packet in_port metadata to be VF metadata and it isn't propagate the post act creation. Fixed by modify post act rules to match only on fte_id and not match on in_port metadata which isn't needed. Fixes: a81283263bb0 ("net/mlx5e: Use multi table support for CT and sample actions") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index dea137dd744b..2b64dd557b5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -128,6 +128,7 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at post_attr->inner_match_level = MLX5_MATCH_NONE; post_attr->outer_match_level = MLX5_MATCH_NONE; post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP; + post_attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; handle->ns_type = post_act->ns_type; /* Splits were handled before post action */ -- cgit v1.2.3 From 562696c3c62c7c23dd896e9447252ce9268cb812 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 26 May 2022 16:48:47 +0300 Subject: net/mlx5e: Fix the value of MLX5E_MAX_RQ_NUM_MTTS MLX5E_MAX_RQ_NUM_MTTS should be the maximum value, so that MLX5_MTT_OCTW(MLX5E_MAX_RQ_NUM_MTTS) fits into u16. The current value of 1 << 17 results in MLX5_MTT_OCTW(1 << 17) = 1 << 16, which doesn't fit into u16. This commit replaces it with the maximum value that still fits u16. Fixes: 73281b78a37a ("net/mlx5e: Derive Striding RQ size from MTU") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b6c15efe92ad..f794ffaf1e04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -109,7 +109,7 @@ struct page_pool; #define MLX5E_REQUIRED_WQE_MTTS (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1)) #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) #define MLX5E_MAX_RQ_NUM_MTTS \ - ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ + (ALIGN_DOWN(U16_MAX, 4) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ #define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024)) #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \ (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS)) -- cgit v1.2.3 From 52586d2f56b3e4f528ca7268d65074e92c936681 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 30 May 2022 21:06:03 +0300 Subject: net/mlx5e: xsk: Account for XSK RQ UMRs when calculating ICOSQ size ICOSQ is used to post UMR WQEs for both regular RQ and XSK RQ. However, space in ICOSQ is reserved only for the regular RQ, which may cause ICOSQ overflows when using XSK (the most risk is on activating channels). This commit fixes the issue by reserving space for XSK UMR WQEs as well. As XSK may be enabled without restarting the channel and recreating the ICOSQ, this space is reserved unconditionally. Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 3c1edfa33aa7..e025040350ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -790,8 +790,20 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; wqebbs = MLX5E_UMR_WQEBBS * BIT(mlx5e_get_rq_log_wq_sz(rqp->rqc)); + + /* If XDP program is attached, XSK may be turned on at any time without + * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of + * both regular RQ and XSK RQ. + * Although mlx5e_mpwqe_get_log_rq_size accepts mlx5e_xsk_param, it + * doesn't affect its return value, as long as params->xdp_prog != NULL, + * so we can just multiply by 2. + */ + if (params->xdp_prog) + wqebbs *= 2; + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp); + return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs)); } -- cgit v1.2.3 From 677e78c8d44f326a73a77d71acf3a49ea562c1d9 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 2 Jun 2022 15:14:08 +0300 Subject: net/mlx5e: Fix calculations related to max MPWQE size Before commit 76c31e5f7585 ("net/mlx5e: Use FW limitation for max MPW WQEBBs"), the maximum size of MPWQE in WQEBBs was hardcoded as a driver constant. That commit started using the firmware capability that can further limit the size, however, it unintentionally changed a few things: 1. The calculation of MLX5E_MAX_KLM_PER_WQE used the size in DS, which was replaced by the size in WQEBBs, making the resulting value 4 times smaller. 2. MLX5E_TX_MPW_MAX_WQEBBS used to be aligned to the cache line size (either 64 or 128 bytes, i.e. 1 or 2 WQEBBs), but it's no longer the case if the firmware capability is smaller than the driver maximum. Fix both issues by using the correct units for MLX5E_MAX_KLM_PER_WQE and by aligning mlx5e_get_sw_max_sq_mpw_wqebbs after taking the minimum. Besides fixing the arithmetics in calculation of MLX5E_MAX_KLM_PER_WQE, also use appropriate constants: `size of BSF * num of DS per WQEBB * number of WQEBBs` (the calculation before the blamed commit) doesn't make much sense to calculate the WQE size in bytes, so just use `size of WQEBB * number of WQEBBs`. While at it, replace the types that hold the number of WQEBBs by u8. These values don't exceed 16, and it allows to fill holes in two structs. Fixes: 76c31e5f7585 ("net/mlx5e: Use FW limitation for max MPW WQEBBs") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f794ffaf1e04..29b10ef787b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -174,8 +174,8 @@ struct page_pool; ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) #define MLX5E_MAX_KLM_PER_WQE(mdev) \ - MLX5E_KLM_ENTRIES_PER_WQE(mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)) \ - << MLX5_MKEY_BSF_OCTO_SIZE) + MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * \ + mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev))) #define MLX5E_MSG_LEVEL NETIF_MSG_LINK @@ -233,7 +233,7 @@ static inline u16 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); } -static inline u16 mlx5e_get_sw_max_sq_mpw_wqebbs(u16 max_sq_wqebbs) +static inline u8 mlx5e_get_sw_max_sq_mpw_wqebbs(u8 max_sq_wqebbs) { /* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS. * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16, @@ -242,11 +242,12 @@ static inline u16 mlx5e_get_sw_max_sq_mpw_wqebbs(u16 max_sq_wqebbs) * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be * cache-aligned. */ -#if L1_CACHE_BYTES < 128 - return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); -#else - return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 2); + u8 wqebbs = min_t(u8, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); + +#if L1_CACHE_BYTES >= 128 + wqebbs = ALIGN_DOWN(wqebbs, 2); #endif + return wqebbs; } struct mlx5e_tx_wqe { @@ -455,7 +456,7 @@ struct mlx5e_txqsq { struct netdev_queue *txq; u32 sqn; u16 stop_room; - u16 max_sq_mpw_wqebbs; + u8 max_sq_mpw_wqebbs; u8 min_inline_mode; struct device *pdev; __be32 mkey_be; @@ -570,7 +571,7 @@ struct mlx5e_xdpsq { struct device *pdev; __be32 mkey_be; u16 stop_room; - u16 max_sq_mpw_wqebbs; + u8 max_sq_mpw_wqebbs; u8 min_inline_mode; unsigned long state; unsigned int hw_mtu; -- cgit v1.2.3 From c0063a43700fa8c98cac2637aa1afcf40bb9e403 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 7 Jul 2022 21:49:18 +0200 Subject: net/mlx5e: Modify slow path rules to go to slow fdb While extending available range of supported chains/prios referenced commit also modified slow path rules to go to FT chain instead of actual slow FDB. However neither of existing users of the MLX5_ATTR_FLAG_SLOW_PATH flag (tunnel encap entries with invalid encap and flows with trap action) need to match on FT chain. After bridge offload was implemented packets of such flows can also be matched by bridge priority tables which is undesirable. Restore slow path flows implementation to redirect packets to slow_fdb. Fixes: 278d51f24330 ("net/mlx5: E-Switch, Increase number of chains and priorities") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 23 ++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 2ce3728576d1..eb79810199d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -230,10 +230,8 @@ esw_setup_ft_dest(struct mlx5_flow_destination *dest, } static void -esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, - struct mlx5_flow_act *flow_act, - struct mlx5_fs_chains *chains, - int i) +esw_setup_accept_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, int i) { if (mlx5_chains_ignore_flow_level_supported(chains)) flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; @@ -241,6 +239,16 @@ esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, dest[i].ft = mlx5_chains_get_tc_end_ft(chains); } +static void +esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, int i) +{ + if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = esw->fdb_table.offloads.slow_fdb; +} + static int esw_setup_chain_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, @@ -475,8 +483,11 @@ esw_setup_dests(struct mlx5_flow_destination *dest, } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); (*i)++; - } else if (mlx5e_tc_attr_flags_skip(attr->flags)) { - esw_setup_slow_path_dest(dest, flow_act, chains, *i); + } else if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) { + esw_setup_slow_path_dest(dest, flow_act, esw, *i); + (*i)++; + } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) { + esw_setup_accept_dest(dest, flow_act, chains, *i); (*i)++; } else if (attr->dest_chain) { err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, -- cgit v1.2.3 From a6e9085d791f8306084fd5bc44dd3fdd4e1ac27b Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Sun, 24 Jul 2022 11:28:21 +0300 Subject: net/mlx5: Adjust log_max_qp to be 18 at most The cited commit limited log_max_qp to be 17 due to FW capabilities. Recently, it turned out that there are old FW versions that supported more than 17, so the cited commit caused a degradation. Thus, set the maximum log_max_qp back to 18 as it was before the cited commit. Fixes: 7f839965b2d7 ("net/mlx5: Update log_max_qp value to be 17 at most") Signed-off-by: Maher Sanalla Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c9b4e50a593e..95f26624b57c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -524,7 +524,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) /* Check log_max_qp from HCA caps to set in current profile */ if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { - prof->log_max_qp = min_t(u8, 17, MLX5_CAP_GEN_MAX(dev, log_max_qp)); + prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp)); } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", prof->log_max_qp, -- cgit v1.2.3 From 62d2664351ef37da34f6f3a3fd8ab34257d6fe30 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 25 Jul 2022 00:06:12 +0300 Subject: net/mlx5: DR, Fix SMFS steering info dump format Fix several issues in SMFS steering info dump: - Fix outdated macro value for matcher mask in the SMFS debug dump format. The existing value denotes the old format of the matcher mask, as it was used during the early stages of development, and it results in wrong parsing by the steering dump parser - wrong fields are shown in the parsed output. - Add the missing destination table to the dumped action. The missing dest table handle breaks the ability to associate between the "go to table" action and the actual table in the steering info. Fixes: 9222f0b27da2 ("net/mlx5: DR, Add support for dumping steering info") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Muhammad Sammar Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c index d5998ef59be4..7adcf0eec13b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -21,10 +21,11 @@ enum dr_dump_rec_type { DR_DUMP_REC_TYPE_TABLE_TX = 3102, DR_DUMP_REC_TYPE_MATCHER = 3200, - DR_DUMP_REC_TYPE_MATCHER_MASK = 3201, + DR_DUMP_REC_TYPE_MATCHER_MASK_DEPRECATED = 3201, DR_DUMP_REC_TYPE_MATCHER_RX = 3202, DR_DUMP_REC_TYPE_MATCHER_TX = 3203, DR_DUMP_REC_TYPE_MATCHER_BUILDER = 3204, + DR_DUMP_REC_TYPE_MATCHER_MASK = 3205, DR_DUMP_REC_TYPE_RULE = 3300, DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 = 3301, @@ -114,13 +115,15 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, break; case DR_ACTION_TYP_FT: if (action->dest_tbl->is_fw_tbl) - seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n", DR_DUMP_REC_TYPE_ACTION_FT, action_id, - rule_id, action->dest_tbl->fw_tbl.id); + rule_id, action->dest_tbl->fw_tbl.id, + -1); else - seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx\n", DR_DUMP_REC_TYPE_ACTION_FT, action_id, - rule_id, action->dest_tbl->tbl->table_id); + rule_id, action->dest_tbl->tbl->table_id, + DR_DBG_PTR_TO_ID(action->dest_tbl->tbl)); break; case DR_ACTION_TYP_CTR: -- cgit v1.2.3 From 42b4f7f66a43cdb9216e76e595c8a9af154806da Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 27 Jun 2022 16:05:31 +0300 Subject: net/mlx5: Fix driver use of uninitialized timeout Currently, driver is setting default values to all timeouts during function setup. The offending commit is using a timeout before function setup, meaning: the timeout is 0 (or garbage), since no value have been set. This may result in failure to probe the driver: mlx5_function_setup:1034:(pid 69850): Firmware over 4294967296 MS in pre-initializing state, aborting probe_one:1591:(pid 69850): mlx5_init_one failed with error code -16 Hence, set default values to timeouts during tout_init() Fixes: 37ca95e62ee2 ("net/mlx5: Increase FW pre-init timeout for health recovery") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c | 11 ++++------- drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 -- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index d758848d34d0..696e45e2bd06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -32,20 +32,17 @@ static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_type dev->timeouts->to[type] = val; } -void mlx5_tout_set_def_val(struct mlx5_core_dev *dev) +int mlx5_tout_init(struct mlx5_core_dev *dev) { int i; - for (i = 0; i < MAX_TIMEOUT_TYPES; i++) - tout_set(dev, tout_def_sw_val[i], i); -} - -int mlx5_tout_init(struct mlx5_core_dev *dev) -{ dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL); if (!dev->timeouts) return -ENOMEM; + for (i = 0; i < MAX_TIMEOUT_TYPES; i++) + tout_set(dev, tout_def_sw_val[i], i); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index 257c03eeab36..bc9e9aeda847 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -35,7 +35,6 @@ int mlx5_tout_init(struct mlx5_core_dev *dev); void mlx5_tout_cleanup(struct mlx5_core_dev *dev); void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); int mlx5_tout_query_dtor(struct mlx5_core_dev *dev); -void mlx5_tout_set_def_val(struct mlx5_core_dev *dev); u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 95f26624b57c..ba2e5232b90b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1023,8 +1023,6 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout) if (mlx5_core_is_pf(dev)) pcie_print_link_status(dev->pdev); - mlx5_tout_set_def_val(dev); - /* wait for firmware to accept initialization segments configurations */ err = wait_fw_init(dev, timeout, -- cgit v1.2.3 From d7c4c9e075f8cc6d88d277bc24e5d99297f03c06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Jul 2022 22:18:21 -0700 Subject: ax25: fix incorrect dev_tracker usage While investigating a separate rose issue [1], and enabling CONFIG_NET_DEV_REFCNT_TRACKER=y, Bernard reported an orthogonal ax25 issue [2] An ax25_dev can be used by one (or many) struct ax25_cb. We thus need different dev_tracker, one per struct ax25_cb. After this patch is applied, we are able to focus on rose. [1] https://lore.kernel.org/netdev/fb7544a1-f42e-9254-18cc-c9b071f4ca70@free.fr/ [2] [ 205.798723] reference already released. [ 205.798732] allocated in: [ 205.798734] ax25_bind+0x1a2/0x230 [ax25] [ 205.798747] __sys_bind+0xea/0x110 [ 205.798753] __x64_sys_bind+0x18/0x20 [ 205.798758] do_syscall_64+0x5c/0x80 [ 205.798763] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 205.798768] freed in: [ 205.798770] ax25_release+0x115/0x370 [ax25] [ 205.798778] __sock_release+0x42/0xb0 [ 205.798782] sock_close+0x15/0x20 [ 205.798785] __fput+0x9f/0x260 [ 205.798789] ____fput+0xe/0x10 [ 205.798792] task_work_run+0x64/0xa0 [ 205.798798] exit_to_user_mode_prepare+0x18b/0x190 [ 205.798804] syscall_exit_to_user_mode+0x26/0x40 [ 205.798808] do_syscall_64+0x69/0x80 [ 205.798812] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 205.798827] ------------[ cut here ]------------ [ 205.798829] WARNING: CPU: 2 PID: 2605 at lib/ref_tracker.c:136 ref_tracker_free.cold+0x60/0x81 [ 205.798837] Modules linked in: rose netrom mkiss ax25 rfcomm cmac algif_hash algif_skcipher af_alg bnep snd_hda_codec_hdmi nls_iso8859_1 i915 rtw88_8821ce rtw88_8821c x86_pkg_temp_thermal rtw88_pci intel_powerclamp rtw88_core snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio coretemp snd_hda_intel kvm_intel snd_intel_dspcfg mac80211 snd_hda_codec kvm i2c_algo_bit drm_buddy drm_dp_helper btusb drm_kms_helper snd_hwdep btrtl snd_hda_core btbcm joydev crct10dif_pclmul btintel crc32_pclmul ghash_clmulni_intel mei_hdcp btmtk intel_rapl_msr aesni_intel bluetooth input_leds snd_pcm crypto_simd syscopyarea processor_thermal_device_pci_legacy sysfillrect cryptd intel_soc_dts_iosf snd_seq sysimgblt ecdh_generic fb_sys_fops rapl libarc4 processor_thermal_device intel_cstate processor_thermal_rfim cec snd_timer ecc snd_seq_device cfg80211 processor_thermal_mbox mei_me processor_thermal_rapl mei rc_core at24 snd intel_pch_thermal intel_rapl_common ttm soundcore int340x_thermal_zone video [ 205.798948] mac_hid acpi_pad sch_fq_codel ipmi_devintf ipmi_msghandler drm msr parport_pc ppdev lp parport ramoops pstore_blk reed_solomon pstore_zone efi_pstore ip_tables x_tables autofs4 hid_generic usbhid hid i2c_i801 i2c_smbus r8169 xhci_pci ahci libahci realtek lpc_ich xhci_pci_renesas [last unloaded: ax25] [ 205.798992] CPU: 2 PID: 2605 Comm: ax25ipd Not tainted 5.18.11-F6BVP #3 [ 205.798996] Hardware name: To be filled by O.E.M. To be filled by O.E.M./CK3, BIOS 5.011 09/16/2020 [ 205.798999] RIP: 0010:ref_tracker_free.cold+0x60/0x81 [ 205.799005] Code: e8 d2 01 9b ff 83 7b 18 00 74 14 48 c7 c7 2f d7 ff 98 e8 10 6e fc ff 8b 7b 18 e8 b8 01 9b ff 4c 89 ee 4c 89 e7 e8 5d fd 07 00 <0f> 0b b8 ea ff ff ff e9 30 05 9b ff 41 0f b6 f7 48 c7 c7 a0 fa 4e [ 205.799008] RSP: 0018:ffffaf5281073958 EFLAGS: 00010286 [ 205.799011] RAX: 0000000080000000 RBX: ffff9a0bd687ebe0 RCX: 0000000000000000 [ 205.799014] RDX: 0000000000000001 RSI: 0000000000000282 RDI: 00000000ffffffff [ 205.799016] RBP: ffffaf5281073a10 R08: 0000000000000003 R09: fffffffffffd5618 [ 205.799019] R10: 0000000000ffff10 R11: 000000000000000f R12: ffff9a0bc53384d0 [ 205.799022] R13: 0000000000000282 R14: 00000000ae000001 R15: 0000000000000001 [ 205.799024] FS: 0000000000000000(0000) GS:ffff9a0d0f300000(0000) knlGS:0000000000000000 [ 205.799028] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 205.799031] CR2: 00007ff6b8311554 CR3: 000000001ac10004 CR4: 00000000001706e0 [ 205.799033] Call Trace: [ 205.799035] [ 205.799038] ? ax25_dev_device_down+0xd9/0x1b0 [ax25] [ 205.799047] ? ax25_device_event+0x9f/0x270 [ax25] [ 205.799055] ? raw_notifier_call_chain+0x49/0x60 [ 205.799060] ? call_netdevice_notifiers_info+0x52/0xa0 [ 205.799065] ? dev_close_many+0xc8/0x120 [ 205.799070] ? unregister_netdevice_many+0x13d/0x890 [ 205.799073] ? unregister_netdevice_queue+0x90/0xe0 [ 205.799076] ? unregister_netdev+0x1d/0x30 [ 205.799080] ? mkiss_close+0x7c/0xc0 [mkiss] [ 205.799084] ? tty_ldisc_close+0x2e/0x40 [ 205.799089] ? tty_ldisc_hangup+0x137/0x210 [ 205.799092] ? __tty_hangup.part.0+0x208/0x350 [ 205.799098] ? tty_vhangup+0x15/0x20 [ 205.799103] ? pty_close+0x127/0x160 [ 205.799108] ? tty_release+0x139/0x5e0 [ 205.799112] ? __fput+0x9f/0x260 [ 205.799118] ax25_dev_device_down+0xd9/0x1b0 [ax25] [ 205.799126] ax25_device_event+0x9f/0x270 [ax25] [ 205.799135] raw_notifier_call_chain+0x49/0x60 [ 205.799140] call_netdevice_notifiers_info+0x52/0xa0 [ 205.799146] dev_close_many+0xc8/0x120 [ 205.799152] unregister_netdevice_many+0x13d/0x890 [ 205.799157] unregister_netdevice_queue+0x90/0xe0 [ 205.799161] unregister_netdev+0x1d/0x30 [ 205.799165] mkiss_close+0x7c/0xc0 [mkiss] [ 205.799170] tty_ldisc_close+0x2e/0x40 [ 205.799173] tty_ldisc_hangup+0x137/0x210 [ 205.799178] __tty_hangup.part.0+0x208/0x350 [ 205.799184] tty_vhangup+0x15/0x20 [ 205.799188] pty_close+0x127/0x160 [ 205.799193] tty_release+0x139/0x5e0 [ 205.799199] __fput+0x9f/0x260 [ 205.799203] ____fput+0xe/0x10 [ 205.799208] task_work_run+0x64/0xa0 [ 205.799213] do_exit+0x33b/0xab0 [ 205.799217] ? __handle_mm_fault+0xc4f/0x15f0 [ 205.799224] do_group_exit+0x35/0xa0 [ 205.799228] __x64_sys_exit_group+0x18/0x20 [ 205.799232] do_syscall_64+0x5c/0x80 [ 205.799238] ? handle_mm_fault+0xba/0x290 [ 205.799242] ? debug_smp_processor_id+0x17/0x20 [ 205.799246] ? fpregs_assert_state_consistent+0x26/0x50 [ 205.799251] ? exit_to_user_mode_prepare+0x49/0x190 [ 205.799256] ? irqentry_exit_to_user_mode+0x9/0x20 [ 205.799260] ? irqentry_exit+0x33/0x40 [ 205.799263] ? exc_page_fault+0x87/0x170 [ 205.799268] ? asm_exc_page_fault+0x8/0x30 [ 205.799273] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 205.799277] RIP: 0033:0x7ff6b80eaca1 [ 205.799281] Code: Unable to access opcode bytes at RIP 0x7ff6b80eac77. [ 205.799283] RSP: 002b:00007fff6dfd4738 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 [ 205.799287] RAX: ffffffffffffffda RBX: 00007ff6b8215a00 RCX: 00007ff6b80eaca1 [ 205.799290] RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000001 [ 205.799293] RBP: 0000000000000001 R08: ffffffffffffff80 R09: 0000000000000028 [ 205.799295] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ff6b8215a00 [ 205.799298] R13: 0000000000000000 R14: 00007ff6b821aee8 R15: 00007ff6b821af00 [ 205.799304] Fixes: feef318c855a ("ax25: fix UAF bugs of net_device caused by rebinding operation") Reported-by: Bernard F6BVP Signed-off-by: Eric Dumazet Cc: Duoming Zhou Link: https://lore.kernel.org/r/20220728051821.3160118-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/ax25.h | 1 + net/ax25/af_ax25.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index a427a05672e2..f8cf3629a419 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -236,6 +236,7 @@ typedef struct ax25_cb { ax25_address source_addr, dest_addr; ax25_digi *digipeat; ax25_dev *ax25_dev; + netdevice_tracker dev_tracker; unsigned char iamdigi; unsigned char state, modulus, pidincl; unsigned short vs, vr, va; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 4c7030ed8d33..5b5363c99ed5 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1065,7 +1065,7 @@ static int ax25_release(struct socket *sock) del_timer_sync(&ax25->t3timer); del_timer_sync(&ax25->idletimer); } - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + dev_put_track(ax25_dev->dev, &ax25->dev_tracker); ax25_dev_put(ax25_dev); } @@ -1146,7 +1146,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (ax25_dev) { ax25_fillin_cb(ax25, ax25_dev); - dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC); + dev_hold_track(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC); } done: -- cgit v1.2.3 From 944fd1aeacb627fa617f85f8e5a34f7ae8ea4d8e Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Mon, 25 Jul 2022 19:14:42 +0100 Subject: net: allow unbound socket for packets in VRF when tcp_l3mdev_accept set The commit 3c82a21f4320 ("net: allow binding socket in a VRF when there's an unbound socket") changed the inet socket lookup to avoid packets in a VRF from matching an unbound socket. This is to ensure the necessary isolation between the default and other VRFs for routing and forwarding. VRF-unaware processes running in the default VRF cannot access another VRF and have to be run with 'ip vrf exec '. This is to be expected with tcp_l3mdev_accept disabled, but could be reallowed when this sysctl option is enabled. So instead of directly checking dif and sdif in inet[6]_match, here call inet_sk_bound_dev_eq(). This allows a match on unbound socket for non-zero sdif i.e. for packets in a VRF, if tcp_l3mdev_accept is enabled. Fixes: 3c82a21f4320 ("net: allow binding socket in a VRF when there's an unbound socket") Signed-off-by: Mike Manning Link: https://lore.kernel.org/netdev/a54c149aed38fded2d3b5fdb1a6c89e36a083b74.camel@lasnet.de/ Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 7 +++---- include/net/inet_hashtables.h | 19 +++---------------- include/net/inet_sock.h | 11 +++++++++++ 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index f259e1ae14ba..56f1286583d3 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -110,8 +110,6 @@ static inline bool inet6_match(struct net *net, const struct sock *sk, const __portpair ports, const int dif, const int sdif) { - int bound_dev_if; - if (!net_eq(sock_net(sk), net) || sk->sk_family != AF_INET6 || sk->sk_portpair != ports || @@ -119,8 +117,9 @@ static inline bool inet6_match(struct net *net, const struct sock *sk, !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return false; - bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); - return bound_dev_if == dif || bound_dev_if == sdif; + /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ + return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, + sdif); } #endif /* IS_ENABLED(CONFIG_IPV6) */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index fd6b510d114b..e9cf2157ed8a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -175,17 +175,6 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) hashinfo->ehash_locks = NULL; } -static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, - int dif, int sdif) -{ -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), - bound_dev_if, dif, sdif); -#else - return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); -#endif -} - struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, @@ -271,16 +260,14 @@ static inline bool inet_match(struct net *net, const struct sock *sk, const __addrpair cookie, const __portpair ports, int dif, int sdif) { - int bound_dev_if; - if (!net_eq(sock_net(sk), net) || sk->sk_portpair != ports || sk->sk_addrpair != cookie) return false; - /* Paired with WRITE_ONCE() from sock_bindtoindex_locked() */ - bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); - return bound_dev_if == dif || bound_dev_if == sdif; + /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ + return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, + sdif); } /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 6395f6b9a5d2..bf5654ce711e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -149,6 +149,17 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, return bound_dev_if == dif || bound_dev_if == sdif; } +static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + struct inet_cork { unsigned int flags; __be32 addr; -- cgit v1.2.3 From 180a6a3ee60a7cb69ed1232388460644f6a21f00 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 28 Jul 2022 14:45:33 +0300 Subject: netdevsim: fib: Fix reference count leak on route deletion failure As part of FIB offload simulation, netdevsim stores IPv4 and IPv6 routes and holds a reference on FIB info structures that in turn hold a reference on the associated nexthop device(s). In the unlikely case where we are unable to allocate memory to process a route deletion request, netdevsim will not release the reference from the associated FIB info structure, thereby preventing the associated nexthop device(s) from ever being removed [1]. Fix this by scheduling a work item that will flush netdevsim's FIB table upon route deletion failure. This will cause netdevsim to release its reference from all the FIB info structures in its table. Reported by Lucas Leong of Trend Micro Zero Day Initiative. Fixes: 0ae3eb7b4611 ("netdevsim: fib: Perform the route programming in a non-atomic context") Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/netdevsim/fib.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index c8f398f5bc5b..57371c697d5c 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -54,6 +54,7 @@ struct nsim_fib_data { struct rhashtable nexthop_ht; struct devlink *devlink; struct work_struct fib_event_work; + struct work_struct fib_flush_work; struct list_head fib_event_queue; spinlock_t fib_event_queue_lock; /* Protects fib event queue list */ struct mutex nh_lock; /* Protects NH HT */ @@ -978,7 +979,7 @@ static int nsim_fib_event_schedule_work(struct nsim_fib_data *data, fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC); if (!fib_event) - return NOTIFY_BAD; + goto err_fib_event_alloc; fib_event->data = data; fib_event->event = event; @@ -1006,6 +1007,9 @@ static int nsim_fib_event_schedule_work(struct nsim_fib_data *data, err_fib_prepare_event: kfree(fib_event); +err_fib_event_alloc: + if (event == FIB_EVENT_ENTRY_DEL) + schedule_work(&data->fib_flush_work); return NOTIFY_BAD; } @@ -1483,6 +1487,24 @@ static void nsim_fib_event_work(struct work_struct *work) mutex_unlock(&data->fib_lock); } +static void nsim_fib_flush_work(struct work_struct *work) +{ + struct nsim_fib_data *data = container_of(work, struct nsim_fib_data, + fib_flush_work); + struct nsim_fib_rt *fib_rt, *fib_rt_tmp; + + /* Process pending work. */ + flush_work(&data->fib_event_work); + + mutex_lock(&data->fib_lock); + list_for_each_entry_safe(fib_rt, fib_rt_tmp, &data->fib_rt_list, list) { + rhashtable_remove_fast(&data->fib_rt_ht, &fib_rt->ht_node, + nsim_fib_rt_ht_params); + nsim_fib_rt_free(fib_rt, data); + } + mutex_unlock(&data->fib_lock); +} + static int nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev) { @@ -1541,6 +1563,7 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, goto err_rhashtable_nexthop_destroy; INIT_WORK(&data->fib_event_work, nsim_fib_event_work); + INIT_WORK(&data->fib_flush_work, nsim_fib_flush_work); INIT_LIST_HEAD(&data->fib_event_queue); spin_lock_init(&data->fib_event_queue_lock); @@ -1587,6 +1610,7 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, err_nexthop_nb_unregister: unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); err_rhashtable_fib_destroy: + cancel_work_sync(&data->fib_flush_work); flush_work(&data->fib_event_work); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); @@ -1616,6 +1640,7 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) NSIM_RESOURCE_IPV4_FIB); unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); + cancel_work_sync(&data->fib_flush_work); flush_work(&data->fib_event_work); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); -- cgit v1.2.3 From 974be75f250378a6913c7bb268adbfd9a76a6df9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 28 Jul 2022 14:45:34 +0300 Subject: netdevsim: fib: Add debugfs knob to simulate route deletion failure The previous patch ("netdevsim: fib: Fix reference count leak on route deletion failure") fixed a reference count leak that happens on route deletion failure. Such failures can only be simulated by injecting slab allocation failures, which cannot be surgically injected. In order to be able to specifically test this scenario, add a debugfs knob that allows user space to fail route deletion requests when enabled. Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/netdevsim/fib.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 57371c697d5c..38a1fde8d886 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -62,6 +62,7 @@ struct nsim_fib_data { bool fail_route_offload; bool fail_res_nexthop_group_replace; bool fail_nexthop_bucket_replace; + bool fail_route_delete; }; struct nsim_fib_rt_key { @@ -915,6 +916,10 @@ static int nsim_fib4_prepare_event(struct fib_notifier_info *info, } break; case FIB_EVENT_ENTRY_DEL: + if (data->fail_route_delete) { + NL_SET_ERR_MSG_MOD(extack, "Failed to process route deletion"); + return -EINVAL; + } nsim_fib_account(&data->ipv4.fib, false); break; } @@ -953,6 +958,11 @@ static int nsim_fib6_prepare_event(struct fib_notifier_info *info, } break; case FIB_EVENT_ENTRY_DEL: + if (data->fail_route_delete) { + err = -EINVAL; + NL_SET_ERR_MSG_MOD(extack, "Failed to process route deletion"); + goto err_fib6_event_fini; + } nsim_fib_account(&data->ipv6.fib, false); break; } @@ -1526,6 +1536,10 @@ nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev) debugfs_create_file("nexthop_bucket_activity", 0200, data->ddir, data, &nsim_nexthop_bucket_activity_fops); + + data->fail_route_delete = false; + debugfs_create_bool("fail_route_delete", 0600, data->ddir, + &data->fail_route_delete); return 0; } -- cgit v1.2.3 From 40823f3ee05f7f55cbd4419062a1a388249e88da Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 28 Jul 2022 14:45:35 +0300 Subject: selftests: netdevsim: Add test cases for route deletion failure Add IPv4 and IPv6 test cases that ensure that we are not leaking a reference on the nexthop device when we are unable to delete its associated route. Without the fix in a previous patch ("netdevsim: fib: Fix reference count leak on route deletion failure") both test cases get stuck, waiting for the reference to be released from the dummy device [1][2]. [1] unregister_netdevice: waiting for dummy1 to become free. Usage count = 5 leaked reference. fib_check_nh+0x275/0x620 fib_create_info+0x237c/0x4d30 fib_table_insert+0x1dd/0x1d20 inet_rtm_newroute+0x11b/0x200 rtnetlink_rcv_msg+0x43b/0xd20 netlink_rcv_skb+0x15e/0x430 netlink_unicast+0x53b/0x800 netlink_sendmsg+0x945/0xe40 ____sys_sendmsg+0x747/0x960 ___sys_sendmsg+0x11d/0x190 __sys_sendmsg+0x118/0x1e0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [2] unregister_netdevice: waiting for dummy1 to become free. Usage count = 5 leaked reference. fib6_nh_init+0xc46/0x1ca0 ip6_route_info_create+0x1167/0x19a0 ip6_route_add+0x27/0x150 inet6_rtm_newroute+0x161/0x170 rtnetlink_rcv_msg+0x43b/0xd20 netlink_rcv_skb+0x15e/0x430 netlink_unicast+0x53b/0x800 netlink_sendmsg+0x945/0xe40 ____sys_sendmsg+0x747/0x960 ___sys_sendmsg+0x11d/0x190 __sys_sendmsg+0x118/0x1e0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Reviewed-by: David Ahern Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/netdevsim/fib.sh | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh index fc794cd30389..6800de816e8b 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -16,6 +16,7 @@ ALL_TESTS=" ipv4_replay ipv4_flush ipv4_error_path + ipv4_delete_fail ipv6_add ipv6_metric ipv6_append_single @@ -29,11 +30,13 @@ ALL_TESTS=" ipv6_replay_single ipv6_replay_multipath ipv6_error_path + ipv6_delete_fail " NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ NUM_NETIFS=0 source $lib_dir/lib.sh source $lib_dir/fib_offload_lib.sh @@ -157,6 +160,27 @@ ipv4_error_path() ipv4_error_path_replay } +ipv4_delete_fail() +{ + RET=0 + + echo "y" > $DEBUGFS_DIR/fib/fail_route_delete + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + ip -n testns1 route add 192.0.2.0/24 dev dummy1 + ip -n testns1 route del 192.0.2.0/24 dev dummy1 &> /dev/null + + # We should not be able to delete the netdev if we are leaking a + # reference. + ip -n testns1 link del dev dummy1 + + log_test "IPv4 route delete failure" + + echo "n" > $DEBUGFS_DIR/fib/fail_route_delete +} + ipv6_add() { fib_ipv6_add_test "testns1" @@ -304,6 +328,27 @@ ipv6_error_path() ipv6_error_path_replay } +ipv6_delete_fail() +{ + RET=0 + + echo "y" > $DEBUGFS_DIR/fib/fail_route_delete + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + ip -n testns1 route add 2001:db8:1::/64 dev dummy1 + ip -n testns1 route del 2001:db8:1::/64 dev dummy1 &> /dev/null + + # We should not be able to delete the netdev if we are leaking a + # reference. + ip -n testns1 link del dev dummy1 + + log_test "IPv6 route delete failure" + + echo "n" > $DEBUGFS_DIR/fib/fail_route_delete +} + fib_notify_on_flag_change_set() { local notify=$1; shift -- cgit v1.2.3 From ec60d54cb9a3d43a02c5612a03093c18233e6601 Mon Sep 17 00:00:00 2001 From: Przemyslaw Patynowski Date: Mon, 13 Jun 2022 18:41:23 -0400 Subject: iavf: Fix max_rate limiting Fix max_rate option in TC, check for proper quanta boundaries. Check for minimum value provided and if it fits expected 50Mbps quanta. Without this patch, iavf could send settings for max_rate limiting that would be accepted from by PF even the max_rate option is less than expected 50Mbps quanta. It results in no rate limiting on traffic as rate limiting will be floored to 0. Example: tc qdisc add dev $vf root mqprio num_tc 3 map 0 2 1 queues \ 2@0 2@2 2@4 hw 1 mode channel shaper bw_rlimit \ max_rate 50Mbps 500Mbps 500Mbps Should limit TC0 to circa 50 Mbps tc qdisc add dev $vf root mqprio num_tc 3 map 0 2 1 queues \ 2@0 2@2 2@4 hw 1 mode channel shaper bw_rlimit \ max_rate 0Mbps 100Kbit 500Mbps Should return error Fixes: d5b33d024496 ("i40evf: add ndo_setup_tc callback to i40evf") Signed-off-by: Przemyslaw Patynowski Signed-off-by: Jun Zhang Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 1 + drivers/net/ethernet/intel/iavf/iavf_main.c | 25 +++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 0ea0361cd86b..c241fbc30f93 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -92,6 +92,7 @@ struct iavf_vsi { #define IAVF_HKEY_ARRAY_SIZE ((IAVF_VFQF_HKEY_MAX_INDEX + 1) * 4) #define IAVF_HLUT_ARRAY_SIZE ((IAVF_VFQF_HLUT_MAX_INDEX + 1) * 4) #define IAVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */ +#define IAVF_MBPS_QUANTA 50 #define IAVF_VIRTCHNL_VF_RESOURCE_SIZE (sizeof(struct virtchnl_vf_resource) + \ (IAVF_MAX_VF_VSI * \ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2e2c153ce46a..51ae10eb348c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3322,6 +3322,7 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, struct tc_mqprio_qopt_offload *mqprio_qopt) { u64 total_max_rate = 0; + u32 tx_rate_rem = 0; int i, num_qps = 0; u64 tx_rate = 0; int ret = 0; @@ -3336,12 +3337,32 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, return -EINVAL; if (mqprio_qopt->min_rate[i]) { dev_err(&adapter->pdev->dev, - "Invalid min tx rate (greater than 0) specified\n"); + "Invalid min tx rate (greater than 0) specified for TC%d\n", + i); return -EINVAL; } - /*convert to Mbps */ + + /* convert to Mbps */ tx_rate = div_u64(mqprio_qopt->max_rate[i], IAVF_MBPS_DIVISOR); + + if (mqprio_qopt->max_rate[i] && + tx_rate < IAVF_MBPS_QUANTA) { + dev_err(&adapter->pdev->dev, + "Invalid max tx rate for TC%d, minimum %dMbps\n", + i, IAVF_MBPS_QUANTA); + return -EINVAL; + } + + (void)div_u64_rem(tx_rate, IAVF_MBPS_QUANTA, &tx_rate_rem); + + if (tx_rate_rem != 0) { + dev_err(&adapter->pdev->dev, + "Invalid max tx rate for TC%d, not divisible by %d\n", + i, IAVF_MBPS_QUANTA); + return -EINVAL; + } + total_max_rate += tx_rate; num_qps += mqprio_qopt->qopt.count[i]; } -- cgit v1.2.3 From 93cb804edab1b9a5bb7bb7b6824012dbb20abf22 Mon Sep 17 00:00:00 2001 From: Przemyslaw Patynowski Date: Wed, 15 Jun 2022 15:36:29 +0200 Subject: iavf: Fix 'tc qdisc show' listing too many queues Fix tc qdisc show dev root displaying too many fq_codel qdiscs. tc_modify_qdisc, which is caller of ndo_setup_tc, expects driver to call netif_set_real_num_tx_queues, which prepares qdiscs. Without this patch, fq_codel qdiscs would not be adjusted to number of queues on VF. e.g.: tc qdisc show dev qdisc mq 0: root qdisc fq_codel 0: parent :4 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 qdisc fq_codel 0: parent :3 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 qdisc fq_codel 0: parent :2 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 qdisc fq_codel 0: parent :1 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 tc qdisc add dev root mqprio num_tc 2 map 1 0 0 0 0 0 0 0 queues 1@0 1@1 hw 1 mode channel shaper bw_rlimit max_rate 5000Mbit 150Mbit tc qdisc show dev qdisc mqprio 8003: root tc 2 map 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 queues:(0:0) (1:1) mode:channel shaper:bw_rlimit max_rate:5Gbit 150Mbit qdisc fq_codel 0: parent 8003:4 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 qdisc fq_codel 0: parent 8003:3 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 qdisc fq_codel 0: parent 8003:2 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 qdisc fq_codel 0: parent 8003:1 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 While after fix: tc qdisc add dev root mqprio num_tc 2 map 1 0 0 0 0 0 0 0 queues 1@0 1@1 hw 1 mode channel shaper bw_rlimit max_rate 5000Mbit 150Mbit tc qdisc show dev #should show 2, shows 4 qdisc mqprio 8004: root tc 2 map 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 queues:(0:0) (1:1) mode:channel shaper:bw_rlimit max_rate:5Gbit 150Mbit qdisc fq_codel 0: parent 8004:2 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 qdisc fq_codel 0: parent 8004:1 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 Fixes: d5b33d024496 ("i40evf: add ndo_setup_tc callback to i40evf") Signed-off-by: Przemyslaw Patynowski Co-developed-by: Grzegorz Szczurek Signed-off-by: Grzegorz Szczurek Co-developed-by: Kiran Patil Signed-off-by: Kiran Patil Signed-off-by: Jedrzej Jagielski Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 5 +++++ drivers/net/ethernet/intel/iavf/iavf_main.c | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index c241fbc30f93..a988c08e906f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -431,6 +431,11 @@ struct iavf_adapter { /* lock to protect access to the cloud filter list */ spinlock_t cloud_filter_list_lock; u16 num_cloud_filters; + /* snapshot of "num_active_queues" before setup_tc for qdisc add + * is invoked. This information is useful during qdisc del flow, + * to restore correct number of queues + */ + int orig_num_active_queues; #define IAVF_MAX_FDIR_FILTERS 128 /* max allowed Flow Director filters */ u16 fdir_active_fltr; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 51ae10eb348c..3dbfaead2ac7 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3429,6 +3429,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) netif_tx_disable(netdev); iavf_del_all_cloud_filters(adapter); adapter->aq_required = IAVF_FLAG_AQ_DISABLE_CHANNELS; + total_qps = adapter->orig_num_active_queues; goto exit; } else { return -EINVAL; @@ -3472,7 +3473,21 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) adapter->ch_config.ch_info[i].offset = 0; } } + + /* Take snapshot of original config such as "num_active_queues" + * It is used later when delete ADQ flow is exercised, so that + * once delete ADQ flow completes, VF shall go back to its + * original queue configuration + */ + + adapter->orig_num_active_queues = adapter->num_active_queues; + + /* Store queue info based on TC so that VF gets configured + * with correct number of queues when VF completes ADQ config + * flow + */ adapter->ch_config.total_qps = total_qps; + netif_tx_stop_all_queues(netdev); netif_tx_disable(netdev); adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_CHANNELS; @@ -3489,6 +3504,12 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) } } exit: + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) + return 0; + + netif_set_real_num_rx_queues(netdev, total_qps); + netif_set_real_num_tx_queues(netdev, total_qps); + return ret; } -- cgit v1.2.3 From 5121db6afb999d0049d37e911b08cd440f6f8f82 Mon Sep 17 00:00:00 2001 From: Li Qiong Date: Wed, 27 Jul 2022 23:03:41 +0800 Subject: net/rds: Use PTR_ERR instead of IS_ERR for rdsdebug() If 'local_odp_mr->r_trans_private' is a error code, it is better to print the error code than to print the value of IS_ERR(). Signed-off-by: Li Qiong Signed-off-by: David S. Miller --- net/rds/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 6f1a50d50d06..fba82d36593a 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -742,7 +742,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, NULL, 0, rs, &local_odp_mr->r_key, NULL, iov->addr, iov->bytes, ODP_VIRTUAL); if (IS_ERR(local_odp_mr->r_trans_private)) { - ret = IS_ERR(local_odp_mr->r_trans_private); + ret = PTR_ERR(local_odp_mr->r_trans_private); rdsdebug("get_mr ret %d %p\"", ret, local_odp_mr->r_trans_private); kfree(local_odp_mr); -- cgit v1.2.3 From 7dc839fe47611e6995f370cae37b9797cf7d2672 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Fri, 29 Jul 2022 18:17:54 +0800 Subject: net: ice: fix error NETIF_F_HW_VLAN_CTAG_FILTER check in ice_vsi_sync_fltr() vsi->current_netdev_flags is used store the current net device flags, not the active netdevice features. So it should use vsi->netdev->featurs, rather than vsi->current_netdev_flags to check NETIF_F_HW_VLAN_CTAG_FILTER. Fixes: 1babaf77f49d ("ice: Advertise 802.1ad VLAN filtering and offloads for PF netdev") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Acked-by: Tony Nguyen Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 9f02b60459f1..bc68dc5c6927 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -433,7 +433,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) IFF_PROMISC; goto out_promisc; } - if (vsi->current_netdev_flags & + if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) vlan_ops->ena_rx_filtering(vsi); } -- cgit v1.2.3 From a86e86db5e6d72c82724a63ca1c5293409a21518 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Fri, 29 Jul 2022 18:17:55 +0800 Subject: net: ionic: fix error check for vlan flags in ionic_set_nic_features() The prototype of input features of ionic_set_nic_features() is netdev_features_t, but the vlan_flags is using the private definition of ionic drivers. It should use the variable ctx.cmd.lif_setattr.features, rather than features to check the vlan flags. So fixes it. Fixes: beead698b173 ("ionic: Add the basic NDO callbacks for netdev support") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Acked-by: Shannon Nelson Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index f3568901eb91..1443f788ee37 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1437,7 +1437,7 @@ static int ionic_set_nic_features(struct ionic_lif *lif, if ((old_hw_features ^ lif->hw_features) & IONIC_ETH_HW_RX_HASH) ionic_lif_rss_config(lif, lif->rss_types, NULL, NULL); - if ((vlan_flags & features) && + if ((vlan_flags & le64_to_cpu(ctx.cmd.lif_setattr.features)) && !(vlan_flags & le64_to_cpu(ctx.comp.lif_setattr.features))) dev_info_once(lif->ionic->dev, "NIC is not supporting vlan offload, likely in SmartNIC mode\n"); -- cgit v1.2.3 From a41b17ff9dacd22f5f118ee53d82da0f3e52d5e3 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Fri, 29 Jul 2022 19:00:27 +0800 Subject: dccp: put dccp_qpolicy_full() and dccp_qpolicy_push() in the same lock In the case of sk->dccps_qpolicy == DCCPQ_POLICY_PRIO, dccp_qpolicy_full will drop a skb when qpolicy is full. And the lock in dccp_sendmsg is released before sock_alloc_send_skb and then relocked after sock_alloc_send_skb. The following conditions may lead dccp_qpolicy_push to add skb to an already full sk_write_queue: thread1--->lock thread1--->dccp_qpolicy_full: queue is full. drop a skb thread1--->unlock thread2--->lock thread2--->dccp_qpolicy_full: queue is not full. no need to drop. thread2--->unlock thread1--->lock thread1--->dccp_qpolicy_push: add a skb. queue is full. thread1--->unlock thread2--->lock thread2--->dccp_qpolicy_push: add a skb! thread2--->unlock Fix this by moving dccp_qpolicy_full. Fixes: b1308dc015eb ("[DCCP]: Set TX Queue Length Bounds via Sysctl") Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20220729110027.40569-1-hbh25y@gmail.com Signed-off-by: Jakub Kicinski --- net/dccp/proto.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index eb8e128e43e8..e13641c65f88 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -736,11 +736,6 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) lock_sock(sk); - if (dccp_qpolicy_full(sk)) { - rc = -EAGAIN; - goto out_release; - } - timeo = sock_sndtimeo(sk, noblock); /* @@ -759,6 +754,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (skb == NULL) goto out_release; + if (dccp_qpolicy_full(sk)) { + rc = -EAGAIN; + goto out_discard; + } + if (sk->sk_state == DCCP_CLOSED) { rc = -ENOTCONN; goto out_discard; -- cgit v1.2.3 From 53e99496abc1b6e9897ad78cf169a06c77db1b5e Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Fri, 29 Jul 2022 14:54:57 +0530 Subject: octeontx2-pf: Reduce minimum mtu size to 60 PTP messages like SYNC, FOLLOW_UP, DELAY_REQ are of size 58 bytes. Using a minimum packet length as 64 makes NIX to pad 6 bytes of zeroes while transmission. This is causing latest ptp4l application to emit errors since length in PTP header and received packet are not same. Padding upto 3 bytes is fine but more than that makes ptp4l to assume the pad bytes as a TLV. Hence reduce the size to 60 from 64. Signed-off-by: Subbaraya Sundeep Signed-off-by: Naveen Mamindlapalli Signed-off-by: Sunil Kovvuri Goutham Link: https://lore.kernel.org/r/20220729092457.3850-1-naveenm@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index c88e8a436029..fbe62bbfb789 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -21,7 +21,7 @@ #define OTX2_HEAD_ROOM OTX2_ALIGN #define OTX2_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN) -#define OTX2_MIN_MTU 64 +#define OTX2_MIN_MTU 60 #define OTX2_MAX_GSO_SEGS 255 #define OTX2_MAX_FRAGS_IN_SQE 9 -- cgit v1.2.3 From f56530dcdb0684406661ac9f1accf48319d07600 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Sat, 30 Jul 2022 16:01:13 -0700 Subject: net: usb: make USB_RTL8153_ECM non user configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This refixes: commit 7da17624e7948d5d9660b910f8079d26d26ce453 nt: usb: USB_RTL8153_ECM should not default to y In general, device drivers should not be enabled by default. which basically broke the commit it claimed to fix, ie: commit 657bc1d10bfc23ac06d5d687ce45826c760744f9 r8153_ecm: avoid to be prior to r8152 driver Avoid r8153_ecm is compiled as built-in, if r8152 driver is compiled as modules. Otherwise, the r8153_ecm would be used, even though the device is supported by r8152 driver. this commit amounted to: drivers/net/usb/Kconfig: +config USB_RTL8153_ECM + tristate "RTL8153 ECM support" + depends on USB_NET_CDCETHER && (USB_RTL8152 || USB_RTL8152=n) + default y + help + This option supports ECM mode for RTL8153 ethernet adapter, when + CONFIG_USB_RTL8152 is not set, or the RTL8153 device is not + supported by r8152 driver. drivers/net/usb/Makefile: -obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o r8153_ecm.o +obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o +obj-$(CONFIG_USB_RTL8153_ECM) += r8153_ecm.o And as can be seen it pulls a piece of the cdc_ether driver out into a separate config option to be able to make this piece modular in case cdc_ether is builtin, while r8152 is modular. While in general, device drivers should indeed not be enabled by default: this isn't a device driver per say, but rather this is support code for the CDCETHER (ECM) driver, and should thus be enabled if it is enabled. See also email thread at: https://www.spinics.net/lists/netdev/msg767649.html In: https://www.spinics.net/lists/netdev/msg768284.html Jakub wrote: And when we say "removed" we can just hide it from what's prompted to the user (whatever such internal options are called)? I believe this way we don't bring back Marek's complaint. Side note: these incorrect defaults will result in Android 13 on 5.15 GKI kernels lacking USB_RTL8153_ECM support while having USB_NET_CDCETHER (luckily we also have USB_RTL8150 and USB_RTL8152, so it's probably only an issue for very new RTL815x hardware with no native 5.15 driver). Fixes: 7da17624e7948d5d ("nt: usb: USB_RTL8153_ECM should not default to y") Cc: Geert Uytterhoeven Cc: Greg Kroah-Hartman Cc: Hayes Wang Cc: Jakub Kicinski Signed-off-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20220730230113.4138858-1-zenczykowski@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/usb/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index e62fc4f2aee0..76659c1c525a 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -637,8 +637,9 @@ config USB_NET_AQC111 * Aquantia AQtion USB to 5GbE config USB_RTL8153_ECM - tristate "RTL8153 ECM support" + tristate depends on USB_NET_CDCETHER && (USB_RTL8152 || USB_RTL8152=n) + default y help This option supports ECM mode for RTL8153 ethernet adapter, when CONFIG_USB_RTL8152 is not set, or the RTL8153 device is not -- cgit v1.2.3 From c67cc4315a8e605ec875bd3a1210a549e3562ddc Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Sun, 31 Jul 2022 16:22:09 +0900 Subject: net: usb: ax88179_178a: Bind only to vendor-specific interface The Anker PowerExpand USB-C to Gigabit Ethernet adapter uses this chipset, but exposes CDC Ethernet configurations as well as the vendor specific one. This driver tries to bind by PID:VID unconditionally and ends up picking up the CDC configuration, which is supposed to be handled by the class driver. To make things even more confusing, it sees both of the CDC class interfaces and tries to bind twice, resulting in two broken Ethernet devices. Change all the ID matches to specifically match the vendor-specific interface. By default the device comes up in CDC mode and is bound by that driver (which works fine); users may switch it to the vendor interface using sysfs to set bConfigurationValue, at which point the device actually goes through a reconnect cycle and comes back as a vendor specific only device, and then this driver binds and works too. The affected device uses VID/PID 0b95:1790, but we might as well change all of them for good measure, since there is no good reason for this driver to bind to standard CDC Ethernet interfaces. v3: Added VID/PID info to commit message Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20220731072209.45504-1-marcan@marcan.st Signed-off-by: Paolo Abeni --- drivers/net/usb/ax88179_178a.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index ac2d400d1d6c..aeb0294385ed 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1965,55 +1965,55 @@ static const struct driver_info at_umc2000sp_info = { static const struct usb_device_id products[] = { { /* ASIX AX88179 10/100/1000 */ - USB_DEVICE(0x0b95, 0x1790), + USB_DEVICE_AND_INTERFACE_INFO(0x0b95, 0x1790, 0xff, 0xff, 0), .driver_info = (unsigned long)&ax88179_info, }, { /* ASIX AX88178A 10/100/1000 */ - USB_DEVICE(0x0b95, 0x178a), + USB_DEVICE_AND_INTERFACE_INFO(0x0b95, 0x178a, 0xff, 0xff, 0), .driver_info = (unsigned long)&ax88178a_info, }, { /* Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller */ - USB_DEVICE(0x04b4, 0x3610), + USB_DEVICE_AND_INTERFACE_INFO(0x04b4, 0x3610, 0xff, 0xff, 0), .driver_info = (unsigned long)&cypress_GX3_info, }, { /* D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter */ - USB_DEVICE(0x2001, 0x4a00), + USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x4a00, 0xff, 0xff, 0), .driver_info = (unsigned long)&dlink_dub1312_info, }, { /* Sitecom USB 3.0 to Gigabit Adapter */ - USB_DEVICE(0x0df6, 0x0072), + USB_DEVICE_AND_INTERFACE_INFO(0x0df6, 0x0072, 0xff, 0xff, 0), .driver_info = (unsigned long)&sitecom_info, }, { /* Samsung USB Ethernet Adapter */ - USB_DEVICE(0x04e8, 0xa100), + USB_DEVICE_AND_INTERFACE_INFO(0x04e8, 0xa100, 0xff, 0xff, 0), .driver_info = (unsigned long)&samsung_info, }, { /* Lenovo OneLinkDock Gigabit LAN */ - USB_DEVICE(0x17ef, 0x304b), + USB_DEVICE_AND_INTERFACE_INFO(0x17ef, 0x304b, 0xff, 0xff, 0), .driver_info = (unsigned long)&lenovo_info, }, { /* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */ - USB_DEVICE(0x050d, 0x0128), + USB_DEVICE_AND_INTERFACE_INFO(0x050d, 0x0128, 0xff, 0xff, 0), .driver_info = (unsigned long)&belkin_info, }, { /* Toshiba USB 3.0 GBit Ethernet Adapter */ - USB_DEVICE(0x0930, 0x0a13), + USB_DEVICE_AND_INTERFACE_INFO(0x0930, 0x0a13, 0xff, 0xff, 0), .driver_info = (unsigned long)&toshiba_info, }, { /* Magic Control Technology U3-A9003 USB 3.0 Gigabit Ethernet Adapter */ - USB_DEVICE(0x0711, 0x0179), + USB_DEVICE_AND_INTERFACE_INFO(0x0711, 0x0179, 0xff, 0xff, 0), .driver_info = (unsigned long)&mct_info, }, { /* Allied Telesis AT-UMC2000 USB 3.0/USB 3.1 Gen 1 to Gigabit Ethernet Adapter */ - USB_DEVICE(0x07c9, 0x000e), + USB_DEVICE_AND_INTERFACE_INFO(0x07c9, 0x000e, 0xff, 0xff, 0), .driver_info = (unsigned long)&at_umc2000_info, }, { /* Allied Telesis AT-UMC200 USB 3.0/USB 3.1 Gen 1 to Fast Ethernet Adapter */ - USB_DEVICE(0x07c9, 0x000f), + USB_DEVICE_AND_INTERFACE_INFO(0x07c9, 0x000f, 0xff, 0xff, 0), .driver_info = (unsigned long)&at_umc200_info, }, { /* Allied Telesis AT-UMC2000/SP USB 3.0/USB 3.1 Gen 1 to Gigabit Ethernet Adapter */ - USB_DEVICE(0x07c9, 0x0010), + USB_DEVICE_AND_INTERFACE_INFO(0x07c9, 0x0010, 0xff, 0xff, 0), .driver_info = (unsigned long)&at_umc2000sp_info, }, { }, -- cgit v1.2.3 From 8eaa1d110800fac050bab44001732747a1c39894 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 29 Jul 2022 15:13:56 +0300 Subject: net/mlx5e: xsk: Discard unaligned XSK frames on striding RQ Striding RQ uses MTT page mapping, where each page corresponds to an XSK frame. MTT pages have alignment requirements, and XSK frames don't have any alignment guarantees in the unaligned mode. Frames with improper alignment must be discarded, otherwise the packet data will be written at a wrong address. Fixes: 282c0c798f8e ("net/mlx5e: Allow XSK frames smaller than a page") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Reviewed-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20220729121356.3990867-1-maximmi@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h | 14 ++++++++++++++ include/net/xdp_sock_drv.h | 11 +++++++++++ 2 files changed, 25 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h index a8cfab4a393c..cc18d97d8ee0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -7,6 +7,8 @@ #include "en.h" #include +#define MLX5E_MTT_PTAG_MASK 0xfffffffffffffff8ULL + /* RX data path */ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, @@ -21,6 +23,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { +retry: dma_info->xsk = xsk_buff_alloc(rq->xsk_pool); if (!dma_info->xsk) return -ENOMEM; @@ -32,6 +35,17 @@ static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq, */ dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk); + /* MTT page mapping has alignment requirements. If they are not + * satisfied, leak the descriptor so that it won't come again, and try + * to allocate a new one. + */ + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + if (unlikely(dma_info->addr & ~MLX5E_MTT_PTAG_MASK)) { + xsk_buff_discard(dma_info->xsk); + goto retry; + } + } + return 0; } diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 4aa031849668..0774ce97c2f1 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -95,6 +95,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) xp_free(xskb); } +static inline void xsk_buff_discard(struct xdp_buff *xdp) +{ + struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); + + xp_release(xskb); +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; @@ -238,6 +245,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) { } +static inline void xsk_buff_discard(struct xdp_buff *xdp) +{ +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { } -- cgit v1.2.3