diff options
author | Vlad Buslov <vladbu@nvidia.com> | 2021-05-31 15:28:39 +0200 |
---|---|---|
committer | Saeed Mahameed <saeedm@nvidia.com> | 2021-06-10 02:20:03 +0200 |
commit | fb1a3132ee1ac968316e45d21a48703a6db0b6c3 (patch) | |
tree | b8de9e4be293c5fd20037ac4325e5cef1cd34e9b | |
parent | net/mlx5e: Fix an error code in mlx5e_arfs_create_tables() (diff) | |
download | linux-fb1a3132ee1ac968316e45d21a48703a6db0b6c3.tar.xz linux-fb1a3132ee1ac968316e45d21a48703a6db0b6c3.zip |
net/mlx5e: Fix use-after-free of encap entry in neigh update handler
Function mlx5e_rep_neigh_update() wasn't updated to accommodate rtnl lock
removal from TC filter update path and properly handle concurrent encap
entry insertion/deletion which can lead to following use-after-free:
[23827.464923] ==================================================================
[23827.469446] BUG: KASAN: use-after-free in mlx5e_encap_take+0x72/0x140 [mlx5_core]
[23827.470971] Read of size 4 at addr ffff8881d132228c by task kworker/u20:6/21635
[23827.472251]
[23827.472615] CPU: 9 PID: 21635 Comm: kworker/u20:6 Not tainted 5.13.0-rc3+ #5
[23827.473788] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[23827.475639] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
[23827.476731] Call Trace:
[23827.477260] dump_stack+0xbb/0x107
[23827.477906] print_address_description.constprop.0+0x18/0x140
[23827.478896] ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
[23827.479879] ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
[23827.480905] kasan_report.cold+0x7c/0xd8
[23827.481701] ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
[23827.482744] kasan_check_range+0x145/0x1a0
[23827.493112] mlx5e_encap_take+0x72/0x140 [mlx5_core]
[23827.494054] ? mlx5e_tc_tun_encap_info_equal_generic+0x140/0x140 [mlx5_core]
[23827.495296] mlx5e_rep_neigh_update+0x41e/0x5e0 [mlx5_core]
[23827.496338] ? mlx5e_rep_neigh_entry_release+0xb80/0xb80 [mlx5_core]
[23827.497486] ? read_word_at_a_time+0xe/0x20
[23827.498250] ? strscpy+0xa0/0x2a0
[23827.498889] process_one_work+0x8ac/0x14e0
[23827.499638] ? lockdep_hardirqs_on_prepare+0x400/0x400
[23827.500537] ? pwq_dec_nr_in_flight+0x2c0/0x2c0
[23827.501359] ? rwlock_bug.part.0+0x90/0x90
[23827.502116] worker_thread+0x53b/0x1220
[23827.502831] ? process_one_work+0x14e0/0x14e0
[23827.503627] kthread+0x328/0x3f0
[23827.504254] ? _raw_spin_unlock_irq+0x24/0x40
[23827.505065] ? __kthread_bind_mask+0x90/0x90
[23827.505912] ret_from_fork+0x1f/0x30
[23827.506621]
[23827.506987] Allocated by task 28248:
[23827.507694] kasan_save_stack+0x1b/0x40
[23827.508476] __kasan_kmalloc+0x7c/0x90
[23827.509197] mlx5e_attach_encap+0xde1/0x1d40 [mlx5_core]
[23827.510194] mlx5e_tc_add_fdb_flow+0x397/0xc40 [mlx5_core]
[23827.511218] __mlx5e_add_fdb_flow+0x519/0xb30 [mlx5_core]
[23827.512234] mlx5e_configure_flower+0x191c/0x4870 [mlx5_core]
[23827.513298] tc_setup_cb_add+0x1d5/0x420
[23827.514023] fl_hw_replace_filter+0x382/0x6a0 [cls_flower]
[23827.514975] fl_change+0x2ceb/0x4a51 [cls_flower]
[23827.515821] tc_new_tfilter+0x89a/0x2070
[23827.516548] rtnetlink_rcv_msg+0x644/0x8c0
[23827.517300] netlink_rcv_skb+0x11d/0x340
[23827.518021] netlink_unicast+0x42b/0x700
[23827.518742] netlink_sendmsg+0x743/0xc20
[23827.519467] sock_sendmsg+0xb2/0xe0
[23827.520131] ____sys_sendmsg+0x590/0x770
[23827.520851] ___sys_sendmsg+0xd8/0x160
[23827.521552] __sys_sendmsg+0xb7/0x140
[23827.522238] do_syscall_64+0x3a/0x70
[23827.522907] entry_SYSCALL_64_after_hwframe+0x44/0xae
[23827.523797]
[23827.524163] Freed by task 25948:
[23827.524780] kasan_save_stack+0x1b/0x40
[23827.525488] kasan_set_track+0x1c/0x30
[23827.526187] kasan_set_free_info+0x20/0x30
[23827.526968] __kasan_slab_free+0xed/0x130
[23827.527709] slab_free_freelist_hook+0xcf/0x1d0
[23827.528528] kmem_cache_free_bulk+0x33a/0x6e0
[23827.529317] kfree_rcu_work+0x55f/0xb70
[23827.530024] process_one_work+0x8ac/0x14e0
[23827.530770] worker_thread+0x53b/0x1220
[23827.531480] kthread+0x328/0x3f0
[23827.532114] ret_from_fork+0x1f/0x30
[23827.532785]
[23827.533147] Last potentially related work creation:
[23827.534007] kasan_save_stack+0x1b/0x40
[23827.534710] kasan_record_aux_stack+0xab/0xc0
[23827.535492] kvfree_call_rcu+0x31/0x7b0
[23827.536206] mlx5e_tc_del_fdb_flow+0x577/0xef0 [mlx5_core]
[23827.537305] mlx5e_flow_put+0x49/0x80 [mlx5_core]
[23827.538290] mlx5e_delete_flower+0x6d1/0xe60 [mlx5_core]
[23827.539300] tc_setup_cb_destroy+0x18e/0x2f0
[23827.540144] fl_hw_destroy_filter+0x1d2/0x310 [cls_flower]
[23827.541148] __fl_delete+0x4dc/0x660 [cls_flower]
[23827.541985] fl_delete+0x97/0x160 [cls_flower]
[23827.542782] tc_del_tfilter+0x7ab/0x13d0
[23827.543503] rtnetlink_rcv_msg+0x644/0x8c0
[23827.544257] netlink_rcv_skb+0x11d/0x340
[23827.544981] netlink_unicast+0x42b/0x700
[23827.545700] netlink_sendmsg+0x743/0xc20
[23827.546424] sock_sendmsg+0xb2/0xe0
[23827.547084] ____sys_sendmsg+0x590/0x770
[23827.547850] ___sys_sendmsg+0xd8/0x160
[23827.548606] __sys_sendmsg+0xb7/0x140
[23827.549303] do_syscall_64+0x3a/0x70
[23827.549969] entry_SYSCALL_64_after_hwframe+0x44/0xae
[23827.550853]
[23827.551217] The buggy address belongs to the object at ffff8881d1322200
[23827.551217] which belongs to the cache kmalloc-256 of size 256
[23827.553341] The buggy address is located 140 bytes inside of
[23827.553341] 256-byte region [ffff8881d1322200, ffff8881d1322300)
[23827.555747] The buggy address belongs to the page:
[23827.556847] page:00000000898762aa refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d1320
[23827.558651] head:00000000898762aa order:2 compound_mapcount:0 compound_pincount:0
[23827.559961] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff)
[23827.561243] raw: 002ffff800010200 dead000000000100 dead000000000122 ffff888100042b40
[23827.562653] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000
[23827.564112] page dumped because: kasan: bad access detected
[23827.565439]
[23827.565932] Memory state around the buggy address:
[23827.566917] ffff8881d1322180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[23827.568485] ffff8881d1322200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[23827.569818] >ffff8881d1322280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[23827.571143] ^
[23827.571879] ffff8881d1322300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[23827.573283] ffff8881d1322380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[23827.574654] ==================================================================
Most of the necessary logic is already correctly implemented by
mlx5e_get_next_valid_encap() helper that is used in neigh stats update
handler. Make the handler generic by renaming it to
mlx5e_get_next_matching_encap() and use callback to test whether flow is
matching instead of hardcoded check for 'valid' flag value. Implement
mlx5e_get_next_valid_encap() by calling mlx5e_get_next_matching_encap()
with callback that tests encap MLX5_ENCAP_ENTRY_VALID flag. Implement new
mlx5e_get_next_init_encap() helper by calling
mlx5e_get_next_matching_encap() with callback that tests encap completion
result to be non-error and use it in mlx5e_rep_neigh_update() to safely
iterate over nhe->encap_list.
Remove encap completion logic from mlx5e_rep_update_flows() since the encap
entries passed to this function are already guaranteed to be properly
initialized by similar code in mlx5e_get_next_init_encap().
Fixes: 2a1f1768fa17 ("net/mlx5e: Refactor neigh update for concurrent execution")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
4 files changed, 40 insertions, 17 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c index be0ee03de721..2e9bee4e5209 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -129,10 +129,9 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) work); struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; struct neighbour *n = update_work->n; + struct mlx5e_encap_entry *e = NULL; bool neigh_connected, same_dev; - struct mlx5e_encap_entry *e; unsigned char ha[ETH_ALEN]; - struct mlx5e_priv *priv; u8 nud_state, dead; rtnl_lock(); @@ -156,14 +155,12 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) if (!same_dev) goto out; - list_for_each_entry(e, &nhe->encap_list, encap_list) { - if (!mlx5e_encap_take(e)) - continue; + /* mlx5e_get_next_init_encap() releases previous encap before returning + * the next one. + */ + while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL) + mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha); - priv = netdev_priv(e->out_dev); - mlx5e_rep_update_flows(priv, e, neigh_connected, ha); - mlx5e_encap_put(priv, e); - } out: rtnl_unlock(); mlx5e_release_neigh_update_work(update_work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 311382261840..85eaadc989df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -94,13 +94,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv, ASSERT_RTNL(); - /* wait for encap to be fully initialized */ - wait_for_completion(&e->res_ready); - mutex_lock(&esw->offloads.encap_tbl_lock); encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); - if (e->compl_result < 0 || (encap_connected == neigh_connected && - ether_addr_equal(e->h_dest, ha))) + if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha)) goto unlock; mlx5e_take_all_encap_flows(e, &flow_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index f1fb11680d20..490131e06efb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -251,9 +251,12 @@ static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, mlx5e_take_tmp_flow(flow, flow_list, 0); } +typedef bool (match_cb)(struct mlx5e_encap_entry *); + static struct mlx5e_encap_entry * -mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, - struct mlx5e_encap_entry *e) +mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e, + match_cb match) { struct mlx5e_encap_entry *next = NULL; @@ -288,7 +291,7 @@ retry: /* wait for encap to be fully initialized */ wait_for_completion(&next->res_ready); /* continue searching if encap entry is not in valid state after completion */ - if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { + if (!match(next)) { e = next; goto retry; } @@ -296,6 +299,30 @@ retry: return next; } +static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) +{ + return e->flags & MLX5_ENCAP_ENTRY_VALID; +} + +static struct mlx5e_encap_entry * +mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); +} + +static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) +{ + return e->compl_result >= 0; +} + +struct mlx5e_encap_entry * +mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); +} + void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh *m_neigh = &nhe->m_neigh; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 25c091795bcd..17027536efba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -178,6 +178,9 @@ void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *f void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); struct mlx5e_neigh_hash_entry; +struct mlx5e_encap_entry * +mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e); void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); |