From 3170256d7bc1ef81587caf4b83573eb1f5bb4fb6 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 29 Aug 2023 15:40:22 +0200 Subject: counter: chrdev: fix getting array extensions When trying to watch a component array extension, and the array isn't the first extended element, it fails as the type comparison is always done on the 1st element. Fix it by indexing the 'ext' array. Example on a dummy struct counter_comp: static struct counter_comp dummy[] = { COUNTER_COMP_DIRECTION(..), ..., COUNTER_COMP_ARRAY_CAPTURE(...), }; static struct counter_count dummy_cnt = { ... .ext = dummy, .num_ext = ARRAY_SIZE(dummy), } Currently, counter_get_ext() returns -EINVAL when trying to add a watch event on one of the capture array element in such example. Fixes: d2011be1e22f ("counter: Introduce the COUNTER_COMP_ARRAY component type") Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20230829134029.2402868-2-fabrice.gasnier@foss.st.com Signed-off-by: William Breathitt Gray --- drivers/counter/counter-chrdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/counter/counter-chrdev.c b/drivers/counter/counter-chrdev.c index 80acdf62794a..afc94d0062b1 100644 --- a/drivers/counter/counter-chrdev.c +++ b/drivers/counter/counter-chrdev.c @@ -247,8 +247,8 @@ static int counter_get_ext(const struct counter_comp *const ext, if (*id == component_id) return 0; - if (ext->type == COUNTER_COMP_ARRAY) { - element = ext->priv; + if (ext[*ext_idx].type == COUNTER_COMP_ARRAY) { + element = ext[*ext_idx].priv; if (component_id - *id < element->length) return 0; -- cgit v1.2.3 From 6d41d4fe28724db16ca1016df0713a07e0cc7448 Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Tue, 15 Aug 2023 22:18:34 +0800 Subject: net: xfrm: skip policies marked as dead while reinserting policies BUG: KASAN: slab-use-after-free in xfrm_policy_inexact_list_reinsert+0xb6/0x430 Read of size 1 at addr ffff8881051f3bf8 by task ip/668 CPU: 2 PID: 668 Comm: ip Not tainted 6.5.0-rc5-00182-g25aa0bebba72-dirty #64 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13 04/01/2014 Call Trace: dump_stack_lvl+0x72/0xa0 print_report+0xd0/0x620 kasan_report+0xb6/0xf0 xfrm_policy_inexact_list_reinsert+0xb6/0x430 xfrm_policy_inexact_insert_node.constprop.0+0x537/0x800 xfrm_policy_inexact_alloc_chain+0x23f/0x320 xfrm_policy_inexact_insert+0x6b/0x590 xfrm_policy_insert+0x3b1/0x480 xfrm_add_policy+0x23c/0x3c0 xfrm_user_rcv_msg+0x2d0/0x510 netlink_rcv_skb+0x10d/0x2d0 xfrm_netlink_rcv+0x49/0x60 netlink_unicast+0x3fe/0x540 netlink_sendmsg+0x528/0x970 sock_sendmsg+0x14a/0x160 ____sys_sendmsg+0x4fc/0x580 ___sys_sendmsg+0xef/0x160 __sys_sendmsg+0xf7/0x1b0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x73/0xdd The root cause is: cpu 0 cpu1 xfrm_dump_policy xfrm_policy_walk list_move_tail xfrm_add_policy ... ... xfrm_policy_inexact_list_reinsert list_for_each_entry_reverse if (!policy->bydst_reinsert) //read non-existent policy xfrm_dump_policy_done xfrm_policy_walk_done list_del(&walk->walk.all); If dump_one_policy() returns err (triggered by netlink socket), xfrm_policy_walk() will move walk initialized by socket to list net->xfrm.policy_all. so this socket becomes visible in the global policy list. The head *walk can be traversed when users add policies with different prefixlen and trigger xfrm_policy node merge. The issue can also be triggered by policy list traversal while rehashing and flushing policies. It can be fixed by skip such "policies" with walk.dead set to 1. Fixes: 9cf545ebd591 ("xfrm: policy: store inexact policies in a tree ordered by destination address") Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list") Signed-off-by: Dong Chenchen Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d6b405782b63..113fb7e9cdaf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -851,7 +851,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net, struct hlist_node *newpos = NULL; bool matches_s, matches_d; - if (!policy->bydst_reinsert) + if (policy->walk.dead || !policy->bydst_reinsert) continue; WARN_ON_ONCE(policy->family != family); @@ -1256,8 +1256,11 @@ static void xfrm_hash_rebuild(struct work_struct *work) struct xfrm_pol_inexact_bin *bin; u8 dbits, sbits; + if (policy->walk.dead) + continue; + dir = xfrm_policy_id2dir(policy->index); - if (policy->walk.dead || dir >= XFRM_POLICY_MAX) + if (dir >= XFRM_POLICY_MAX) continue; if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { @@ -1823,9 +1826,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) again: list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { + if (pol->walk.dead) + continue; + dir = xfrm_policy_id2dir(pol->index); - if (pol->walk.dead || - dir >= XFRM_POLICY_MAX || + if (dir >= XFRM_POLICY_MAX || pol->type != type) continue; @@ -1862,9 +1867,11 @@ int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, again: list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { + if (pol->walk.dead) + continue; + dir = xfrm_policy_id2dir(pol->index); - if (pol->walk.dead || - dir >= XFRM_POLICY_MAX || + if (dir >= XFRM_POLICY_MAX || pol->xdo.dev != dev) continue; -- cgit v1.2.3 From df8fdd01c98b99d04915c04f3a5ce73f55456b7c Mon Sep 17 00:00:00 2001 From: Dharma Balasubiramani Date: Tue, 5 Sep 2023 15:38:35 +0530 Subject: counter: microchip-tcb-capture: Fix the use of internal GCLK logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per the datasheet, the clock selection Bits 2:0 – TCCLKS[2:0] should be set to 0 while using the internal GCLK (TIMER_CLOCK1). Fixes: 106b104137fd ("counter: Add microchip TCB capture counter") Signed-off-by: Dharma Balasubiramani Link: https://lore.kernel.org/r/20230905100835.315024-1-dharma.b@microchip.com Signed-off-by: William Breathitt Gray --- drivers/counter/microchip-tcb-capture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c index e2d1dc6ca668..c7af13aca36c 100644 --- a/drivers/counter/microchip-tcb-capture.c +++ b/drivers/counter/microchip-tcb-capture.c @@ -98,7 +98,7 @@ static int mchp_tc_count_function_write(struct counter_device *counter, priv->qdec_mode = 0; /* Set highest rate based on whether soc has gclk or not */ bmr &= ~(ATMEL_TC_QDEN | ATMEL_TC_POSEN); - if (priv->tc_cfg->has_gclk) + if (!priv->tc_cfg->has_gclk) cmr |= ATMEL_TC_TIMER_CLOCK2; else cmr |= ATMEL_TC_TIMER_CLOCK1; -- cgit v1.2.3 From f7c4e3e5d4f6609b4725a97451948ca2e425379a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Sep 2023 13:23:03 +0000 Subject: xfrm: interface: use DEV_STATS_INC() syzbot/KCSAN reported data-races in xfrm whenever dev->stats fields are updated. It appears all of these updates can happen from multiple cpus. Adopt SMP safe DEV_STATS_INC() to update dev->stats fields. BUG: KCSAN: data-race in xfrmi_xmit / xfrmi_xmit read-write to 0xffff88813726b160 of 8 bytes by task 23986 on cpu 1: xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583 __netdev_start_xmit include/linux/netdevice.h:4889 [inline] netdev_start_xmit include/linux/netdevice.h:4903 [inline] xmit_one net/core/dev.c:3544 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560 __dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340 dev_queue_xmit include/linux/netdevice.h:3082 [inline] neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581 neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:293 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:458 [inline] ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487 udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963 udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246 inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840 sock_sendmsg_nosec net/socket.c:730 [inline] sock_sendmsg net/socket.c:753 [inline] ____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540 ___sys_sendmsg net/socket.c:2594 [inline] __sys_sendmmsg+0x269/0x500 net/socket.c:2680 __do_sys_sendmmsg net/socket.c:2709 [inline] __se_sys_sendmmsg net/socket.c:2706 [inline] __x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read-write to 0xffff88813726b160 of 8 bytes by task 23987 on cpu 0: xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583 __netdev_start_xmit include/linux/netdevice.h:4889 [inline] netdev_start_xmit include/linux/netdevice.h:4903 [inline] xmit_one net/core/dev.c:3544 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560 __dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340 dev_queue_xmit include/linux/netdevice.h:3082 [inline] neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581 neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:293 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:458 [inline] ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487 udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963 udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246 inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840 sock_sendmsg_nosec net/socket.c:730 [inline] sock_sendmsg net/socket.c:753 [inline] ____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540 ___sys_sendmsg net/socket.c:2594 [inline] __sys_sendmmsg+0x269/0x500 net/socket.c:2680 __do_sys_sendmmsg net/socket.c:2709 [inline] __se_sys_sendmmsg net/socket.c:2706 [inline] __x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x00000000000010d7 -> 0x00000000000010d8 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 23987 Comm: syz-executor.5 Not tainted 6.5.0-syzkaller-10885-g0468be89b3fa #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Steffen Klassert Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface_core.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index b86474084690..e21cc71095bb 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -380,8 +380,8 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) skb->dev = dev; if (err) { - dev->stats.rx_errors++; - dev->stats.rx_dropped++; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_dropped); return 0; } @@ -426,7 +426,6 @@ static int xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device_stats *stats = &xi->dev->stats; struct dst_entry *dst = skb_dst(skb); unsigned int length = skb->len; struct net_device *tdev; @@ -473,7 +472,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) tdev = dst->dev; if (tdev == dev) { - stats->collisions++; + DEV_STATS_INC(dev, collisions); net_warn_ratelimited("%s: Local routing loop detected!\n", dev->name); goto tx_err_dst_release; @@ -512,13 +511,13 @@ xmit: if (net_xmit_eval(err) == 0) { dev_sw_netstats_tx_add(dev, 1, length); } else { - stats->tx_errors++; - stats->tx_aborted_errors++; + DEV_STATS_INC(dev, tx_errors); + DEV_STATS_INC(dev, tx_aborted_errors); } return 0; tx_err_link_failure: - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); dst_link_failure(skb); tx_err_dst_release: dst_release(dst); @@ -528,7 +527,6 @@ tx_err_dst_release: static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device_stats *stats = &xi->dev->stats; struct dst_entry *dst = skb_dst(skb); struct flowi fl; int ret; @@ -545,7 +543,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6); if (dst->error) { dst_release(dst); - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); goto tx_err; } skb_dst_set(skb, dst); @@ -561,7 +559,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4); if (IS_ERR(rt)) { - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); goto tx_err; } skb_dst_set(skb, &rt->dst); @@ -580,8 +578,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; tx_err: - stats->tx_errors++; - stats->tx_dropped++; + DEV_STATS_INC(dev, tx_errors); + DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } -- cgit v1.2.3 From 9a85653ed3b9a9b7b31d95a34b64b990c3d33ca1 Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Thu, 3 Aug 2023 16:56:23 -0300 Subject: iio: dac: ad3552r: Correct device IDs Device IDs for AD3542R and AD3552R were swapped leading to unintended collection of DAC output ranges being used for each design. Change device ID values so they are correct for each DAC chip. Fixes: 8f2b54824b28 ("drivers:iio:dac: Add AD3552R driver support") Signed-off-by: Marcelo Schmitt Reported-by: Chandrakant Minajigi Link: https://lore.kernel.org/r/011f480220799fbfabdd53896f8a2f251ad995ad.1691091324.git.marcelo.schmitt1@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c index d5ea1a1be122..a492e8f2fc0f 100644 --- a/drivers/iio/dac/ad3552r.c +++ b/drivers/iio/dac/ad3552r.c @@ -140,8 +140,8 @@ enum ad3552r_ch_vref_select { }; enum ad3542r_id { - AD3542R_ID = 0x4008, - AD3552R_ID = 0x4009, + AD3542R_ID = 0x4009, + AD3552R_ID = 0x4008, }; enum ad3552r_ch_output_range { -- cgit v1.2.3 From 85dfb43bf69281adb1f345dfd9a39faf2e5a718d Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Fri, 11 Aug 2023 16:58:29 +0100 Subject: iio: pressure: bmp280: Fix NULL pointer exception The bmp085 EOC IRQ support is optional, but the driver's common probe function queries the IRQ properties whether or not it exists, which can trigger a NULL pointer exception. Avoid any exception by making the query conditional on the possession of a valid IRQ. Fixes: aae953949651 ("iio: pressure: bmp280: add support for BMP085 EOC interrupt") Signed-off-by: Phil Elwell Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20230811155829.51208-1-phil@raspberrypi.com Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 6089f3f9d8f4..a2ef1373a274 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -2179,7 +2179,7 @@ int bmp280_common_probe(struct device *dev, * however as it happens, the BMP085 shares the chip ID of BMP180 * so we look for an IRQ if we have that. */ - if (irq > 0 || (chip_id == BMP180_CHIP_ID)) { + if (irq > 0 && (chip_id == BMP180_CHIP_ID)) { ret = bmp085_fetch_eoc_irq(dev, name, irq, data); if (ret) return ret; -- cgit v1.2.3 From 287d998af24326b009ae0956820a3188501b34a0 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Mon, 7 Aug 2023 17:38:05 +0300 Subject: iio: admv1013: add mixer_vgate corner cases Include the corner cases in the computation of the MIXER_VGATE register value. According to the datasheet: The MIXER_VGATE values follows the VCM such as, that for a 0V to 1.8V VCM, MIXER_VGATE = 23.89 VCM + 81, and for a > 1.8V to 2.6V VCM, MIXER_VGATE = 23.75 VCM + 1.25. Fixes: da35a7b526d9 ("iio: frequency: admv1013: add support for ADMV1013") Signed-off-by: Antoniu Miclaus Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20230807143806.6954-1-antoniu.miclaus@analog.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/frequency/admv1013.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c index 6355c1f28423..92923074f930 100644 --- a/drivers/iio/frequency/admv1013.c +++ b/drivers/iio/frequency/admv1013.c @@ -351,9 +351,9 @@ static int admv1013_update_mixer_vgate(struct admv1013_state *st) if (vcm < 0) return vcm; - if (vcm < 1800000) + if (vcm <= 1800000) mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100; - else if (vcm > 1800000 && vcm < 2600000) + else if (vcm > 1800000 && vcm <= 2600000) mixer_vgate = (2375 * vcm / 1000000 + 125) / 100; else return -EINVAL; -- cgit v1.2.3 From 3e4bc23926b83c3c67e5f61ae8571602754131a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Sep 2023 18:13:59 +0000 Subject: xfrm: fix a data-race in xfrm_gen_index() xfrm_gen_index() mutual exclusion uses net->xfrm.xfrm_policy_lock. This means we must use a per-netns idx_generator variable, instead of a static one. Alternative would be to use an atomic variable. syzbot reported: BUG: KCSAN: data-race in xfrm_sk_policy_insert / xfrm_sk_policy_insert write to 0xffffffff87005938 of 4 bytes by task 29466 on cpu 0: xfrm_gen_index net/xfrm/xfrm_policy.c:1385 [inline] xfrm_sk_policy_insert+0x262/0x640 net/xfrm/xfrm_policy.c:2347 xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639 do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943 ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012 rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054 sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697 __sys_setsockopt+0x1c9/0x230 net/socket.c:2263 __do_sys_setsockopt net/socket.c:2274 [inline] __se_sys_setsockopt net/socket.c:2271 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2271 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffffffff87005938 of 4 bytes by task 29460 on cpu 1: xfrm_sk_policy_insert+0x13e/0x640 xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639 do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943 ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012 rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054 sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697 __sys_setsockopt+0x1c9/0x230 net/socket.c:2263 __do_sys_setsockopt net/socket.c:2274 [inline] __se_sys_setsockopt net/socket.c:2271 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2271 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x00006ad8 -> 0x00006b18 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 29460 Comm: syz-executor.1 Not tainted 6.5.0-rc5-syzkaller-00243-g9106536c1aa3 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 Fixes: 1121994c803f ("netns xfrm: policy insertion in netns") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Steffen Klassert Cc: Herbert Xu Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index bd7c3be4af5d..423b52eca908 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -50,6 +50,7 @@ struct netns_xfrm { struct list_head policy_all; struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; + unsigned int idx_generator; struct hlist_head policy_inexact[XFRM_POLICY_MAX]; struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 113fb7e9cdaf..99df2862bdc9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1375,8 +1375,6 @@ EXPORT_SYMBOL(xfrm_policy_hash_rebuild); * of an absolute inpredictability of ordering of rules. This will not pass. */ static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { - static u32 idx_generator; - for (;;) { struct hlist_head *list; struct xfrm_policy *p; @@ -1384,8 +1382,8 @@ static u32 xfrm_gen_index(struct net *net, int dir, u32 index) int found; if (!index) { - idx = (idx_generator | dir); - idx_generator += 8; + idx = (net->xfrm.idx_generator | dir); + net->xfrm.idx_generator += 8; } else { idx = index; index = 0; -- cgit v1.2.3 From 582620d9f6b352552bc9a3316fe2b1c3acd8742d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 18 Aug 2023 15:27:46 +0300 Subject: thunderbolt: Workaround an IOMMU fault on certain systems with Intel Maple Ridge On some systems the IOMMU blocks the first couple of driver ready messages to the connection manager firmware as can be seen in below excerpts: thunderbolt 0000:06:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0010 address=0xbb0e3400 flags=0x0020] or DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Write] Request device [04:00.0] PASID ffffffff fault addr 69974000 [fault reason 05] PTE Write access is not set The reason is unknown and hard to debug because we were not able to reproduce this locally. This only happens on certain systems with Intel Maple Ridge Thunderbolt controller. If there is a device connected when the driver is loaded the issue does not happen either. Only when there is nothing connected (so typically when the system is booted up). We can work this around by sending the driver ready several times. After a couple of retries the message goes through and the controller works just fine. For this reason make the number of retries a parameter for icm_request() and then for Maple Ridge (and Titan Ridge as they us the same function but this should not matter) increase number of retries while shortening the timeout accordingly. Reported-by: Werner Sembach Reported-by: Konrad J Hambrick Reported-by: Calvin Walton Closes: https://bugzilla.kernel.org/show_bug.cgi?id=214259 Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/icm.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index dbdcad8d73bf..d8b9c734abd3 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -41,6 +41,7 @@ #define PHY_PORT_CS1_LINK_STATE_SHIFT 26 #define ICM_TIMEOUT 5000 /* ms */ +#define ICM_RETRIES 3 #define ICM_APPROVE_TIMEOUT 10000 /* ms */ #define ICM_MAX_LINK 4 @@ -296,10 +297,9 @@ static bool icm_copy(struct tb_cfg_request *req, const struct ctl_pkg *pkg) static int icm_request(struct tb *tb, const void *request, size_t request_size, void *response, size_t response_size, size_t npackets, - unsigned int timeout_msec) + int retries, unsigned int timeout_msec) { struct icm *icm = tb_priv(tb); - int retries = 3; do { struct tb_cfg_request *req; @@ -410,7 +410,7 @@ static int icm_fr_get_route(struct tb *tb, u8 link, u8 depth, u64 *route) return -ENOMEM; ret = icm_request(tb, &request, sizeof(request), switches, - sizeof(*switches), npackets, ICM_TIMEOUT); + sizeof(*switches), npackets, ICM_RETRIES, ICM_TIMEOUT); if (ret) goto err_free; @@ -463,7 +463,7 @@ icm_fr_driver_ready(struct tb *tb, enum tb_security_level *security_level, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -488,7 +488,7 @@ static int icm_fr_approve_switch(struct tb *tb, struct tb_switch *sw) memset(&reply, 0, sizeof(reply)); /* Use larger timeout as establishing tunnels can take some time */ ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_APPROVE_TIMEOUT); + 1, ICM_RETRIES, ICM_APPROVE_TIMEOUT); if (ret) return ret; @@ -515,7 +515,7 @@ static int icm_fr_add_switch_key(struct tb *tb, struct tb_switch *sw) memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -543,7 +543,7 @@ static int icm_fr_challenge_switch_key(struct tb *tb, struct tb_switch *sw, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -577,7 +577,7 @@ static int icm_fr_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1020,7 +1020,7 @@ icm_tr_driver_ready(struct tb *tb, enum tb_security_level *security_level, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, 20000); + 1, 10, 2000); if (ret) return ret; @@ -1053,7 +1053,7 @@ static int icm_tr_approve_switch(struct tb *tb, struct tb_switch *sw) memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_APPROVE_TIMEOUT); + 1, ICM_RETRIES, ICM_APPROVE_TIMEOUT); if (ret) return ret; @@ -1081,7 +1081,7 @@ static int icm_tr_add_switch_key(struct tb *tb, struct tb_switch *sw) memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1110,7 +1110,7 @@ static int icm_tr_challenge_switch_key(struct tb *tb, struct tb_switch *sw, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1144,7 +1144,7 @@ static int icm_tr_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1170,7 +1170,7 @@ static int icm_tr_xdomain_tear_down(struct tb *tb, struct tb_xdomain *xd, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1496,7 +1496,7 @@ icm_ar_driver_ready(struct tb *tb, enum tb_security_level *security_level, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1522,7 +1522,7 @@ static int icm_ar_get_route(struct tb *tb, u8 link, u8 depth, u64 *route) memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1543,7 +1543,7 @@ static int icm_ar_get_boot_acl(struct tb *tb, uuid_t *uuids, size_t nuuids) memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1604,7 +1604,7 @@ static int icm_ar_set_boot_acl(struct tb *tb, const uuid_t *uuids, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; @@ -1626,7 +1626,7 @@ icm_icl_driver_ready(struct tb *tb, enum tb_security_level *security_level, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, 20000); + 1, ICM_RETRIES, 20000); if (ret) return ret; @@ -2298,7 +2298,7 @@ static int icm_usb4_switch_op(struct tb_switch *sw, u16 opcode, u32 *metadata, memset(&reply, 0, sizeof(reply)); ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), - 1, ICM_TIMEOUT); + 1, ICM_RETRIES, ICM_TIMEOUT); if (ret) return ret; -- cgit v1.2.3 From a9fdf5f933a6f2b358fad0194b1287b67f6704b1 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 22 Aug 2023 16:36:18 +0300 Subject: thunderbolt: Check that lane 1 is in CL0 before enabling lane bonding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marek reported that when BlackMagic UltraStudio device is connected the kernel repeatedly tries to enable lane bonding without success making the device non-functional. It looks like the device does not have lane 1 connected at all so even though it is enabled we should not try to bond the lanes. For this reason check that lane 1 is in fact CL0 (connected, active) before attempting to bond the lanes. Reported-by: Marek Šanta Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217737 Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 43171cc1cc2d..bd5815f8f23b 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2725,6 +2725,13 @@ int tb_switch_lane_bonding_enable(struct tb_switch *sw) !tb_port_is_width_supported(down, TB_LINK_WIDTH_DUAL)) return 0; + /* + * Both lanes need to be in CL0. Here we assume lane 0 already be in + * CL0 and check just for lane 1. + */ + if (tb_wait_for_port(down->dual_link_port, false) <= 0) + return -ENOTCONN; + ret = tb_port_lane_bonding_enable(up); if (ret) { tb_port_warn(up, "failed to enable lane bonding\n"); -- cgit v1.2.3 From e19f714ea63f861d95d3d92d45d5fd5ca2e05c8c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 31 Aug 2023 14:10:46 +0300 Subject: thunderbolt: Correct TMU mode initialization from hardware David reported that cppcheck found following possible copy & paste error from tmu_mode_init(): tmu.c:385:50: style: Expression is always false because 'else if' condition matches previous condition at line 383. [multiCondition] And indeed this is a bug. Fix it to use correct index (TB_SWITCH_TMU_MODE_HIFI_UNI). Reported-by: David Binderman Fixes: d49b4f043d63 ("thunderbolt: Add support for enhanced uni-directional TMU mode") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/tmu.c b/drivers/thunderbolt/tmu.c index 747f88703d5c..11f2aec2a5d3 100644 --- a/drivers/thunderbolt/tmu.c +++ b/drivers/thunderbolt/tmu.c @@ -382,7 +382,7 @@ static int tmu_mode_init(struct tb_switch *sw) } else if (ucap && tb_port_tmu_is_unidirectional(up)) { if (tmu_rates[TB_SWITCH_TMU_MODE_LOWRES] == rate) sw->tmu.mode = TB_SWITCH_TMU_MODE_LOWRES; - else if (tmu_rates[TB_SWITCH_TMU_MODE_LOWRES] == rate) + else if (tmu_rates[TB_SWITCH_TMU_MODE_HIFI_UNI] == rate) sw->tmu.mode = TB_SWITCH_TMU_MODE_HIFI_UNI; } else if (rate) { sw->tmu.mode = TB_SWITCH_TMU_MODE_HIFI_BI; -- cgit v1.2.3 From 308092d080852f8997126e5b3507536162416f4a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 7 Sep 2023 16:02:30 +0300 Subject: thunderbolt: Restart XDomain discovery handshake after failure Alex reported that after rebooting the other host the peer-to-peer link does not come up anymore. The reason for this is that the host that was not rebooted tries to send the UUID request only 10 times according to the USB4 Inter-Domain spec and gives up if it does not get reply. Then when the other side is actually ready it cannot get the link established anymore. The USB4 Inter-Domain spec requires that the discovery protocol is restarted in that case so implement this now. Reported-by: Alex Balcanquall Fixes: 8e1de7042596 ("thunderbolt: Add support for XDomain lane bonding") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/xdomain.c | 58 ++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index 5b5566862318..9803f0bbf20d 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -703,6 +703,27 @@ out_unlock: mutex_unlock(&xdomain_lock); } +static void start_handshake(struct tb_xdomain *xd) +{ + xd->state = XDOMAIN_STATE_INIT; + queue_delayed_work(xd->tb->wq, &xd->state_work, + msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT)); +} + +/* Can be called from state_work */ +static void __stop_handshake(struct tb_xdomain *xd) +{ + cancel_delayed_work_sync(&xd->properties_changed_work); + xd->properties_changed_retries = 0; + xd->state_retries = 0; +} + +static void stop_handshake(struct tb_xdomain *xd) +{ + cancel_delayed_work_sync(&xd->state_work); + __stop_handshake(xd); +} + static void tb_xdp_handle_request(struct work_struct *work) { struct xdomain_request_work *xw = container_of(work, typeof(*xw), work); @@ -765,6 +786,15 @@ static void tb_xdp_handle_request(struct work_struct *work) case UUID_REQUEST: tb_dbg(tb, "%llx: received XDomain UUID request\n", route); ret = tb_xdp_uuid_response(ctl, route, sequence, uuid); + /* + * If we've stopped the discovery with an error such as + * timing out, we will restart the handshake now that we + * received UUID request from the remote host. + */ + if (!ret && xd && xd->state == XDOMAIN_STATE_ERROR) { + dev_dbg(&xd->dev, "restarting handshake\n"); + start_handshake(xd); + } break; case LINK_STATE_STATUS_REQUEST: @@ -1521,6 +1551,13 @@ static void tb_xdomain_queue_properties_changed(struct tb_xdomain *xd) msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT)); } +static void tb_xdomain_failed(struct tb_xdomain *xd) +{ + xd->state = XDOMAIN_STATE_ERROR; + queue_delayed_work(xd->tb->wq, &xd->state_work, + msecs_to_jiffies(XDOMAIN_DEFAULT_TIMEOUT)); +} + static void tb_xdomain_state_work(struct work_struct *work) { struct tb_xdomain *xd = container_of(work, typeof(*xd), state_work.work); @@ -1547,7 +1584,7 @@ static void tb_xdomain_state_work(struct work_struct *work) if (ret) { if (ret == -EAGAIN) goto retry_state; - xd->state = XDOMAIN_STATE_ERROR; + tb_xdomain_failed(xd); } else { tb_xdomain_queue_properties_changed(xd); if (xd->bonding_possible) @@ -1612,7 +1649,7 @@ static void tb_xdomain_state_work(struct work_struct *work) if (ret) { if (ret == -EAGAIN) goto retry_state; - xd->state = XDOMAIN_STATE_ERROR; + tb_xdomain_failed(xd); } else { xd->state = XDOMAIN_STATE_ENUMERATED; } @@ -1623,6 +1660,8 @@ static void tb_xdomain_state_work(struct work_struct *work) break; case XDOMAIN_STATE_ERROR: + dev_dbg(&xd->dev, "discovery failed, stopping handshake\n"); + __stop_handshake(xd); break; default: @@ -1833,21 +1872,6 @@ static void tb_xdomain_release(struct device *dev) kfree(xd); } -static void start_handshake(struct tb_xdomain *xd) -{ - xd->state = XDOMAIN_STATE_INIT; - queue_delayed_work(xd->tb->wq, &xd->state_work, - msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT)); -} - -static void stop_handshake(struct tb_xdomain *xd) -{ - cancel_delayed_work_sync(&xd->properties_changed_work); - cancel_delayed_work_sync(&xd->state_work); - xd->properties_changed_retries = 0; - xd->state_retries = 0; -} - static int __maybe_unused tb_xdomain_suspend(struct device *dev) { stop_handshake(tb_to_xdomain(dev)); -- cgit v1.2.3 From bd2767ec3df2775bc336f441f9068a989ccb919d Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 23 Aug 2023 09:59:48 +0530 Subject: coresight: Fix run time warnings while reusing ETR buffer Fix the below warning by avoding calls to tmc_etr_enable_hw, if we are reusing the ETR buffer for multiple sources in sysfs mode. echo 1 > /sys/bus/coresight/devices/tmc_etr0/enable_sink echo 1 > /sys/bus/coresight/devices/ete1/enable_source echo 1 > /sys/bus/coresight/devices/ete2/enable_source [ 166.918290] ------------[ cut here ]------------ [ 166.922905] WARNING: CPU: 4 PID: 2288 at drivers/hwtracing/coresight/coresight-tmc-etr.c:1037 tmc_etr_enable_hw+0xb0/0xc8 [ 166.933862] Modules linked in: [ 166.936911] CPU: 4 PID: 2288 Comm: bash Not tainted 6.5.0-rc7 #132 [ 166.943084] Hardware name: Marvell CN106XX board (DT) [ 166.948127] pstate: 834000c9 (Nzcv daIF +PAN -UAO +TCO +DIT -SSBS BTYPE=--) [ 166.955083] pc : tmc_etr_enable_hw+0xb0/0xc8 [ 166.959345] lr : tmc_enable_etr_sink+0x134/0x210 snip.. 167.038545] Call trace: [ 167.040982] tmc_etr_enable_hw+0xb0/0xc8 [ 167.044897] tmc_enable_etr_sink+0x134/0x210 [ 167.049160] coresight_enable_path+0x160/0x278 [ 167.053596] coresight_enable+0xd4/0x298 [ 167.057510] enable_source_store+0x54/0xa0 [ 167.061598] dev_attr_store+0x20/0x40 [ 167.065254] sysfs_kf_write+0x4c/0x68 [ 167.068909] kernfs_fop_write_iter+0x128/0x200 [ 167.073345] vfs_write+0x1ac/0x2f8 [ 167.076739] ksys_write+0x74/0x110 [ 167.080132] __arm64_sys_write+0x24/0x38 [ 167.084045] invoke_syscall.constprop.0+0x58/0xf8 [ 167.088744] do_el0_svc+0x60/0x160 [ 167.092137] el0_svc+0x40/0x170 [ 167.095273] el0t_64_sync_handler+0x100/0x130 [ 167.099621] el0t_64_sync+0x190/0x198 [ 167.103277] ---[ end trace 0000000000000000 ]--- -bash: echo: write error: Device or resource busy Fixes: 296b01fd106e ("coresight: Refactor out buffer allocation function for ETR") Signed-off-by: Linu Cherian Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230823042948.12879-1-lcherian@marvell.com --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 66dc5f97a009..6132c5b3db9c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1173,16 +1173,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) goto out; } - /* - * In sysFS mode we can have multiple writers per sink. Since this - * sink is already enabled no memory is needed and the HW need not be - * touched, even if the buffer size has changed. - */ - if (drvdata->mode == CS_MODE_SYSFS) { - atomic_inc(&csdev->refcnt); - goto out; - } - /* * If we don't have a buffer or it doesn't match the requested size, * use the buffer allocated above. Otherwise reuse the existing buffer. @@ -1204,7 +1194,7 @@ out: static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) { - int ret; + int ret = 0; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); struct etr_buf *sysfs_buf = tmc_etr_get_sysfs_buffer(csdev); @@ -1213,12 +1203,24 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) return PTR_ERR(sysfs_buf); spin_lock_irqsave(&drvdata->spinlock, flags); + + /* + * In sysFS mode we can have multiple writers per sink. Since this + * sink is already enabled no memory is needed and the HW need not be + * touched, even if the buffer size has changed. + */ + if (drvdata->mode == CS_MODE_SYSFS) { + atomic_inc(&csdev->refcnt); + goto out; + } + ret = tmc_etr_enable_hw(drvdata, sysfs_buf); if (!ret) { drvdata->mode = CS_MODE_SYSFS; atomic_inc(&csdev->refcnt); } +out: spin_unlock_irqrestore(&drvdata->spinlock, flags); if (!ret) -- cgit v1.2.3 From e28a0974d749e5105d77233c0a84d35c37da047e Mon Sep 17 00:00:00 2001 From: Max Nguyen Date: Sun, 17 Sep 2023 22:21:53 -0700 Subject: Input: xpad - add HyperX Clutch Gladiate Support Add HyperX controller support to xpad_device and xpad_table. Suggested-by: Chris Toledanes Reviewed-by: Carl Ng Signed-off-by: Max Nguyen Reviewed-by: Rahul Rameshbabu Link: https://lore.kernel.org/r/20230906231514.4291-1-hphyperxdev@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index ede380551e55..b22f9c6e7fa5 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -130,6 +130,7 @@ static const struct xpad_device { { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 }, + { 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, @@ -459,6 +460,7 @@ static const struct usb_device_id xpad_table[] = { { USB_INTERFACE_INFO('X', 'B', 0) }, /* Xbox USB-IF not-approved class */ XPAD_XBOX360_VENDOR(0x0079), /* GPD Win 2 controller */ XPAD_XBOX360_VENDOR(0x03eb), /* Wooting Keyboards (Legacy) */ + XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */ XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */ -- cgit v1.2.3 From c1ed72171ed580fbf159e703b77685aa4b0d0df5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Sep 2023 12:08:27 +0200 Subject: ACPI: resource: Skip IRQ override on ASUS ExpertBook B1402CBA Like various other ASUS ExpertBook-s, the ASUS ExpertBook B1402CBA has an ACPI DSDT table that describes IRQ 1 as ActiveLow while the kernel overrides it to EdgeHigh. This prevents the keyboard from working. To fix this issue, add this laptop to the skip_override_table so that the kernel does not override IRQ 1. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217901 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 32cfa3f4efd3..8116b55b6c98 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -439,6 +439,13 @@ static const struct dmi_system_id asus_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "S5602ZA"), }, }, + { + .ident = "Asus ExpertBook B1402CBA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "B1402CBA"), + }, + }, { .ident = "Asus ExpertBook B1502CBA", .matches = { -- cgit v1.2.3 From b06fab003ae181c6690fe6d1f806636f816f4e50 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 15 Sep 2023 12:01:13 +0200 Subject: riscv: kselftests: Fix mm build by removing testcases subdirectory kselftests fails to build because the mm/testcases subdirectory is not created and then the compiler fails to output the binary there. So fix this by simply removing this subdirectory which is not very useful. Signed-off-by: Alexandre Ghiti Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20230915100113.13131-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- tools/testing/selftests/riscv/mm/Makefile | 6 +- tools/testing/selftests/riscv/mm/mmap_bottomup.c | 35 ++++++++++++ tools/testing/selftests/riscv/mm/mmap_default.c | 35 ++++++++++++ tools/testing/selftests/riscv/mm/mmap_test.h | 64 ++++++++++++++++++++++ tools/testing/selftests/riscv/mm/run_mmap.sh | 12 ++++ .../selftests/riscv/mm/testcases/mmap_bottomup.c | 35 ------------ .../selftests/riscv/mm/testcases/mmap_default.c | 35 ------------ .../selftests/riscv/mm/testcases/mmap_test.h | 64 ---------------------- .../selftests/riscv/mm/testcases/run_mmap.sh | 12 ---- 9 files changed, 149 insertions(+), 149 deletions(-) create mode 100644 tools/testing/selftests/riscv/mm/mmap_bottomup.c create mode 100644 tools/testing/selftests/riscv/mm/mmap_default.c create mode 100644 tools/testing/selftests/riscv/mm/mmap_test.h create mode 100755 tools/testing/selftests/riscv/mm/run_mmap.sh delete mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c delete mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap_default.c delete mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap_test.h delete mode 100755 tools/testing/selftests/riscv/mm/testcases/run_mmap.sh diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile index 11e0f0568923..c333263f2b27 100644 --- a/tools/testing/selftests/riscv/mm/Makefile +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -5,11 +5,11 @@ # Additional include paths needed by kselftest.h and local headers CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. -TEST_GEN_FILES := testcases/mmap_default testcases/mmap_bottomup +TEST_GEN_FILES := mmap_default mmap_bottomup -TEST_PROGS := testcases/run_mmap.sh +TEST_PROGS := run_mmap.sh include ../../lib.mk -$(OUTPUT)/mm: testcases/mmap_default.c testcases/mmap_bottomup.c testcases/mmap_tests.h +$(OUTPUT)/mm: mmap_default.c mmap_bottomup.c mmap_tests.h $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/mm/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c new file mode 100644 index 000000000000..1757d19ca89b --- /dev/null +++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include + +#include "../../kselftest_harness.h" + +TEST(infinite_rlimit) +{ +// Only works on 64 bit +#if __riscv_xlen == 64 + struct addresses mmap_addresses; + + EXPECT_EQ(BOTTOM_UP, memory_layout()); + + do_mmaps(&mmap_addresses); + + EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); + + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); + EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); +#endif +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c new file mode 100644 index 000000000000..c63c60b9397e --- /dev/null +++ b/tools/testing/selftests/riscv/mm/mmap_default.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include + +#include "../../kselftest_harness.h" + +TEST(default_rlimit) +{ +// Only works on 64 bit +#if __riscv_xlen == 64 + struct addresses mmap_addresses; + + EXPECT_EQ(TOP_DOWN, memory_layout()); + + do_mmaps(&mmap_addresses); + + EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); + + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); + EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); +#endif +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h new file mode 100644 index 000000000000..9b8434f62f57 --- /dev/null +++ b/tools/testing/selftests/riscv/mm/mmap_test.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _TESTCASES_MMAP_TEST_H +#define _TESTCASES_MMAP_TEST_H +#include +#include +#include + +#define TOP_DOWN 0 +#define BOTTOM_UP 1 + +struct addresses { + int *no_hint; + int *on_37_addr; + int *on_38_addr; + int *on_46_addr; + int *on_47_addr; + int *on_55_addr; + int *on_56_addr; +}; + +static inline void do_mmaps(struct addresses *mmap_addresses) +{ + /* + * Place all of the hint addresses on the boundaries of mmap + * sv39, sv48, sv57 + * User addresses end at 1<<38, 1<<47, 1<<56 respectively + */ + void *on_37_bits = (void *)(1UL << 37); + void *on_38_bits = (void *)(1UL << 38); + void *on_46_bits = (void *)(1UL << 46); + void *on_47_bits = (void *)(1UL << 47); + void *on_55_bits = (void *)(1UL << 55); + void *on_56_bits = (void *)(1UL << 56); + + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + mmap_addresses->no_hint = + mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_37_addr = + mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_38_addr = + mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_46_addr = + mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_47_addr = + mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_55_addr = + mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_56_addr = + mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0); +} + +static inline int memory_layout(void) +{ + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0); + void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0); + + return value2 > value1; +} +#endif /* _TESTCASES_MMAP_TEST_H */ diff --git a/tools/testing/selftests/riscv/mm/run_mmap.sh b/tools/testing/selftests/riscv/mm/run_mmap.sh new file mode 100755 index 000000000000..ca5ad7c48bad --- /dev/null +++ b/tools/testing/selftests/riscv/mm/run_mmap.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +original_stack_limit=$(ulimit -s) + +./mmap_default + +# Force mmap_bottomup to be ran with bottomup memory due to +# the unlimited stack +ulimit -s unlimited +./mmap_bottomup +ulimit -s $original_stack_limit diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c deleted file mode 100644 index b29379f7e478..000000000000 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include - -#include "../../kselftest_harness.h" - -TEST(infinite_rlimit) -{ -// Only works on 64 bit -#if __riscv_xlen == 64 - struct addresses mmap_addresses; - - EXPECT_EQ(BOTTOM_UP, memory_layout()); - - do_mmaps(&mmap_addresses); - - EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); - - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); - EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); -#endif -} - -TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c b/tools/testing/selftests/riscv/mm/testcases/mmap_default.c deleted file mode 100644 index d1accb91b726..000000000000 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include - -#include "../../kselftest_harness.h" - -TEST(default_rlimit) -{ -// Only works on 64 bit -#if __riscv_xlen == 64 - struct addresses mmap_addresses; - - EXPECT_EQ(TOP_DOWN, memory_layout()); - - do_mmaps(&mmap_addresses); - - EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); - EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); - - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); - EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); - EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); - EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); -#endif -} - -TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h b/tools/testing/selftests/riscv/mm/testcases/mmap_test.h deleted file mode 100644 index 9b8434f62f57..000000000000 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _TESTCASES_MMAP_TEST_H -#define _TESTCASES_MMAP_TEST_H -#include -#include -#include - -#define TOP_DOWN 0 -#define BOTTOM_UP 1 - -struct addresses { - int *no_hint; - int *on_37_addr; - int *on_38_addr; - int *on_46_addr; - int *on_47_addr; - int *on_55_addr; - int *on_56_addr; -}; - -static inline void do_mmaps(struct addresses *mmap_addresses) -{ - /* - * Place all of the hint addresses on the boundaries of mmap - * sv39, sv48, sv57 - * User addresses end at 1<<38, 1<<47, 1<<56 respectively - */ - void *on_37_bits = (void *)(1UL << 37); - void *on_38_bits = (void *)(1UL << 38); - void *on_46_bits = (void *)(1UL << 46); - void *on_47_bits = (void *)(1UL << 47); - void *on_55_bits = (void *)(1UL << 55); - void *on_56_bits = (void *)(1UL << 56); - - int prot = PROT_READ | PROT_WRITE; - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - - mmap_addresses->no_hint = - mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_37_addr = - mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_38_addr = - mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_46_addr = - mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_47_addr = - mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_55_addr = - mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0); - mmap_addresses->on_56_addr = - mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0); -} - -static inline int memory_layout(void) -{ - int prot = PROT_READ | PROT_WRITE; - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - - void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0); - void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0); - - return value2 > value1; -} -#endif /* _TESTCASES_MMAP_TEST_H */ diff --git a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh b/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh deleted file mode 100755 index ca5ad7c48bad..000000000000 --- a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -original_stack_limit=$(ulimit -s) - -./mmap_default - -# Force mmap_bottomup to be ran with bottomup memory due to -# the unlimited stack -ulimit -s unlimited -./mmap_bottomup -ulimit -s $original_stack_limit -- cgit v1.2.3 From e5028011885a85032aa3c1b7e3e493bcdacb4a0a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 17 Aug 2023 17:19:51 +0100 Subject: coresight: tmc-etr: Disable warnings for allocation failures Running the following command on Juno triggers the warning: $ perf record -e cs_etm// -m ,128M ... ------------[ cut here ]------------ WARNING: CPU: 1 PID: 412 at mm/page_alloc.c:4453 __alloc_pages+0x334/0x1420 CPU: 1 PID: 412 Comm: perf Not tainted 6.5.0-rc3+ #181 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Feb 1 2019 pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __alloc_pages+0x334/0x1420 lr : dma_common_alloc_pages+0x108/0x138 sp : ffffffc087fb7440 x29: ffffffc087fb7440 x28: 0000000000000000 x27: ffffffc07e48fba0 x26: 0000000000000001 x25: 000000000000000f x24: ffffffc081f24880 x23: 0000000000000cc0 x22: ffffff88012b6f08 x21: 0000000008000000 x20: ffffff8801433000 x19: 0000000000000000 x18: 0000000000000000 x17: ffffffc080316e5c x16: ffffffc07e46406c x15: ffffffc0803af580 x14: ffffffc08036b460 x13: ffffffc080025cbc x12: ffffffb8108c3fc4 x11: 1ffffff8108c3fc3 x10: 1ffffff810ff6eac x9 : 00000000f204f204 x8 : 000000000000f204 x7 : 00000000f2f2f2f2 x6 : 00000000f3f3f3f3 x5 : 0000000000000001 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000cc0 x1 : 0000000000000000 x0 : ffffffc085333000 Call trace: __alloc_pages+0x334/0x1420 dma_common_alloc_pages+0x108/0x138 __dma_alloc_pages+0xf4/0x108 dma_alloc_pages+0x18/0x30 tmc_etr_alloc_flat_buf+0xa0/0x190 [coresight_tmc] tmc_alloc_etr_buf.constprop.0+0x124/0x298 [coresight_tmc] alloc_etr_buf.constprop.0.isra.0+0x88/0xc8 [coresight_tmc] tmc_alloc_etr_buffer+0x164/0x2f0 [coresight_tmc] etm_setup_aux+0x32c/0x520 [coresight] rb_alloc_aux+0x29c/0x3f8 perf_mmap+0x59c/0xce0 mmap_region+0x340/0x10e0 do_mmap+0x48c/0x580 vm_mmap_pgoff+0x160/0x248 ksys_mmap_pgoff+0x1e8/0x278 __arm64_sys_mmap+0x8c/0xb8 With the flat mode, we only attempt to allocate large memory if there is an IOMMU connected to the ETR. If the allocation fails, we always have a fallback path and return an error if nothing else worked. So, suppress the warning for flat mode allocations. Cc: Mike Leach Cc: James Clark Cc: Anshuman Khandual Signed-off-by: Suzuki K Poulose Reviewed-by: James Clark Link: https://lore.kernel.org/r/20230817161951.658534-1-suzuki.poulose@arm.com --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 6132c5b3db9c..8311e1028ddb 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -610,7 +610,8 @@ static int tmc_etr_alloc_flat_buf(struct tmc_drvdata *drvdata, flat_buf->vaddr = dma_alloc_noncoherent(real_dev, etr_buf->size, &flat_buf->daddr, - DMA_FROM_DEVICE, GFP_KERNEL); + DMA_FROM_DEVICE, + GFP_KERNEL | __GFP_NOWARN); if (!flat_buf->vaddr) { kfree(flat_buf); return -ENOMEM; -- cgit v1.2.3 From cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 15 Sep 2023 19:20:41 +0800 Subject: xfrm6: fix inet6_dev refcount underflow problem There are race conditions that may lead to inet6_dev refcount underflow in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev(). One of the refcount underflow bugs is shown below: (cpu 1) | (cpu 2) xfrm6_dst_destroy() | ... | in6_dev_put() | | rt6_uncached_list_flush_dev() ... | ... | in6_dev_put() rt6_uncached_list_del() | ... ... | xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(), so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put() again for the same inet6_dev. Fix it by moving in6_dev_put() after rt6_uncached_list_del() in xfrm6_dst_destroy(). Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts") Signed-off-by: Zhang Changzhong Reviewed-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 188224a76685..45d0f9a8b28c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - if (likely(xdst->u.rt6.rt6i_idev)) - in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); rt6_uncached_list_del(&xdst->u.rt6); + if (likely(xdst->u.rt6.rt6i_idev)) + in6_dev_put(xdst->u.rt6.rt6i_idev); xfrm_dst_destroy(xdst); } -- cgit v1.2.3 From 9f564b92cf6d0ecb398f9348600a7d8a7f8ea804 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 12 Sep 2023 08:56:19 +0200 Subject: riscv: Only consider swbp/ss handlers for correct privileged mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RISC-V software breakpoint trap handlers are used for {k,u}probes. When trapping from kernelmode, only the kernelmode handlers should be considered. Vice versa, only usermode handlers for usermode traps. This is not the case on RISC-V, which can trigger a bug if a userspace process uses uprobes, and a WARN() is triggered from kernelmode (which is implemented via {c.,}ebreak). The kernel will trap on the kernelmode {c.,}ebreak, look for uprobes handlers, realize incorrectly that uprobes need to be handled, and exit the trap handler early. The trap returns to re-executing the {c.,}ebreak, and enter an infinite trap-loop. The issue was found running the BPF selftest [1]. Fix this issue by only considering the swbp/ss handlers for kernel/usermode respectively. Also, move CONFIG ifdeffery from traps.c to the asm/{k,u}probes.h headers. Note that linux/uprobes.h only include asm/uprobes.h if CONFIG_UPROBES is defined, which is why asm/uprobes.h needs to be unconditionally included in traps.c Link: https://lore.kernel.org/linux-riscv/87v8d19aun.fsf@all.your.base.are.belong.to.us/ # [1] Fixes: 74784081aac8 ("riscv: Add uprobes supported") Reviewed-by: Guo Ren Reviewed-by: Nam Cao Tested-by: Puranjay Mohan Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20230912065619.62020-1-bjorn@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kprobes.h | 11 ++++++++++- arch/riscv/include/asm/uprobes.h | 13 ++++++++++++- arch/riscv/kernel/traps.c | 28 ++++++++++++++++++---------- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h index e7882ccb0fd4..78ea44f76718 100644 --- a/arch/riscv/include/asm/kprobes.h +++ b/arch/riscv/include/asm/kprobes.h @@ -40,6 +40,15 @@ void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr); bool kprobe_breakpoint_handler(struct pt_regs *regs); bool kprobe_single_step_handler(struct pt_regs *regs); - +#else +static inline bool kprobe_breakpoint_handler(struct pt_regs *regs) +{ + return false; +} + +static inline bool kprobe_single_step_handler(struct pt_regs *regs) +{ + return false; +} #endif /* CONFIG_KPROBES */ #endif /* _ASM_RISCV_KPROBES_H */ diff --git a/arch/riscv/include/asm/uprobes.h b/arch/riscv/include/asm/uprobes.h index f2183e00fdd2..3fc7deda9190 100644 --- a/arch/riscv/include/asm/uprobes.h +++ b/arch/riscv/include/asm/uprobes.h @@ -34,7 +34,18 @@ struct arch_uprobe { bool simulate; }; +#ifdef CONFIG_UPROBES bool uprobe_breakpoint_handler(struct pt_regs *regs); bool uprobe_single_step_handler(struct pt_regs *regs); - +#else +static inline bool uprobe_breakpoint_handler(struct pt_regs *regs) +{ + return false; +} + +static inline bool uprobe_single_step_handler(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_UPROBES */ #endif /* _ASM_RISCV_UPROBES_H */ diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 19807c4d3805..fae8f610d867 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include #include @@ -247,22 +249,28 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return GET_INSN_LENGTH(insn); } +static bool probe_single_step_handler(struct pt_regs *regs) +{ + bool user = user_mode(regs); + + return user ? uprobe_single_step_handler(regs) : kprobe_single_step_handler(regs); +} + +static bool probe_breakpoint_handler(struct pt_regs *regs) +{ + bool user = user_mode(regs); + + return user ? uprobe_breakpoint_handler(regs) : kprobe_breakpoint_handler(regs); +} + void handle_break(struct pt_regs *regs) { -#ifdef CONFIG_KPROBES - if (kprobe_single_step_handler(regs)) + if (probe_single_step_handler(regs)) return; - if (kprobe_breakpoint_handler(regs)) - return; -#endif -#ifdef CONFIG_UPROBES - if (uprobe_single_step_handler(regs)) + if (probe_breakpoint_handler(regs)) return; - if (uprobe_breakpoint_handler(regs)) - return; -#endif current->thread.bad_cause = regs->cause; if (user_mode(regs)) -- cgit v1.2.3 From 0c1a2e69bcb506f48ebf94bd199bab0b93f66da2 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 8 Aug 2023 15:19:50 -0700 Subject: drm/msm/dp: do not reinitialize phy unless retry during link training DP PHY re-initialization done using dp_ctrl_reinitialize_mainlink() will cause PLL unlocked initially and then PLL gets locked at the end of initialization. PLL_UNLOCKED interrupt will fire during this time if the interrupt mask is enabled. However currently DP driver link training implementation incorrectly re-initializes PHY unconditionally during link training as the PHY was already configured in dp_ctrl_enable_mainlink_clocks(). Fix this by re-initializing the PHY only if the previous link training failed. [drm:dp_aux_isr] *ERROR* Unexpected DP AUX IRQ 0x01000000 when not busy Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/30 Signed-off-by: Kuogee Hsieh Tested-by: Abhinav Kumar # sc7280 Reviewed-by: Abhinav Kumar Reviewed-by: Stephen Boyd Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/551847/ Link: https://lore.kernel.org/r/1691533190-19335-1-git-send-email-quic_khsieh@quicinc.com [quic_abhinavk@quicinc.com: added line break in commit text] Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index a7a5c7e0ab92..77a8d9366ed7 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1774,13 +1774,6 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) return rc; while (--link_train_max_retries) { - rc = dp_ctrl_reinitialize_mainlink(ctrl); - if (rc) { - DRM_ERROR("Failed to reinitialize mainlink. rc=%d\n", - rc); - break; - } - training_step = DP_TRAINING_NONE; rc = dp_ctrl_setup_main_link(ctrl, &training_step); if (rc == 0) { @@ -1832,6 +1825,12 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) /* stop link training before start re training */ dp_ctrl_clear_training_pattern(ctrl); } + + rc = dp_ctrl_reinitialize_mainlink(ctrl); + if (rc) { + DRM_ERROR("Failed to reinitialize mainlink. rc=%d\n", rc); + break; + } } if (ctrl->link->sink_request & DP_TEST_LINK_PHY_TEST_PATTERN) -- cgit v1.2.3 From c0666d1dc6816e74192600e154517d7943b917fe Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 5 Sep 2023 20:43:48 +0300 Subject: drm/msm/mdss: fix highest-bank-bit for msm8998 According to the vendor DT files, msm8998 has highest-bank-bit equal to 2. Update the data accordingly. Fixes: 6f410b246209 ("drm/msm/mdss: populate missing data") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/555840/ Link: https://lore.kernel.org/r/20230905174353.3118648-2-dmitry.baryshkov@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/msm_mdss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 2e87dd6cb17b..348c66b14683 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -511,7 +511,7 @@ static int mdss_remove(struct platform_device *pdev) static const struct msm_mdss_data msm8998_data = { .ubwc_enc_version = UBWC_1_0, .ubwc_dec_version = UBWC_1_0, - .highest_bank_bit = 1, + .highest_bank_bit = 2, }; static const struct msm_mdss_data qcm2290_data = { -- cgit v1.2.3 From ab483e3adcc178254eb1ce0fbdfbea65f86f1006 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Fri, 15 Sep 2023 13:44:25 -0700 Subject: drm/msm/dsi: skip the wait for video mode done if not applicable dsi_wait4video_done() API waits for the DSI video mode engine to become idle so that we can transmit the DCS commands in the beginning of BLLP. However, with the current sequence, the MDP timing engine is turned on after the panel's pre_enable() callback which can send out the DCS commands needed to power up the panel. During those cases, this API will always timeout and print out the error spam leading to long bootup times and log flooding. Fix this by checking if the DSI video engine was actually busy before waiting for it to become idle otherwise this is a redundant wait. changes in v2: - move the reg read below the video mode check - minor fixes in commit text Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/34 Fixes: a689554ba6ed ("drm/msm: Initial add DSI connector support") Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/557853/ Link: https://lore.kernel.org/r/20230915204426.19011-1-quic_abhinavk@quicinc.com --- drivers/gpu/drm/msm/dsi/dsi_host.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 5d9ec27c89d3..4da450742302 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1082,9 +1082,21 @@ static void dsi_wait4video_done(struct msm_dsi_host *msm_host) static void dsi_wait4video_eng_busy(struct msm_dsi_host *msm_host) { + u32 data; + if (!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO)) return; + data = dsi_read(msm_host, REG_DSI_STATUS0); + + /* if video mode engine is not busy, its because + * either timing engine was not turned on or the + * DSI controller has finished transmitting the video + * data already, so no need to wait in those cases + */ + if (!(data & DSI_STATUS0_VIDEO_MODE_ENGINE_BUSY)) + return; + if (msm_host->power_on && msm_host->enabled) { dsi_wait4video_done(msm_host); /* delay 4 ms to skip BLLP */ -- cgit v1.2.3 From 6a1d4c7976dd1ee7c9f80bc8e62801ec7b1f2f58 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Sep 2023 15:59:40 +0300 Subject: drm/msm/dsi: fix irq_of_parse_and_map() error checking The irq_of_parse_and_map() function returns zero on error. It never returns negative error codes. Fix the check. Fixes: a689554ba6ed ("drm/msm: Initial add DSI connector support") Signed-off-by: Dan Carpenter Reviewed-by: Konrad Dybcio Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/557715/ Link: https://lore.kernel.org/r/4f3c5c98-04f7-43f7-900f-5d7482c83eef@moroto.mountain Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dsi/dsi_host.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 4da450742302..3d6fb708dc22 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1906,10 +1906,9 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi) } msm_host->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - if (msm_host->irq < 0) { - ret = msm_host->irq; - dev_err(&pdev->dev, "failed to get irq: %d\n", ret); - return ret; + if (!msm_host->irq) { + dev_err(&pdev->dev, "failed to get irq\n"); + return -EINVAL; } /* do not autoenable, will be enabled later */ -- cgit v1.2.3 From 95e681ca3b65e4ce3d2537b47672d787b7d30375 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Thu, 7 Sep 2023 18:26:16 -0700 Subject: drm/msm/dpu: change _dpu_plane_calc_bw() to use u64 to avoid overflow _dpu_plane_calc_bw() uses integer variables to calculate the bandwidth used during plane bandwidth calculations. However for high resolution displays this overflows easily and leads to below errors [dpu error]crtc83 failed performance check -7 Promote the intermediate variables to u64 to avoid overflow. changes in v2: - change to u64 where actually needed in the math Fixes: c33b7c0389e1 ("drm/msm/dpu: add support for clk and bw scaling for display") Reviewed-by: Dmitry Baryshkov Reported-by: Nia Espera Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/32 Tested-by: Nia Espera Patchwork: https://patchwork.freedesktop.org/patch/556288/ Link: https://lore.kernel.org/r/20230908012616.20654-1-quic_abhinavk@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index c2aaaded07ed..98c1b22e9bca 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -119,6 +119,7 @@ static u64 _dpu_plane_calc_bw(const struct dpu_mdss_cfg *catalog, struct dpu_sw_pipe_cfg *pipe_cfg) { int src_width, src_height, dst_height, fps; + u64 plane_pixel_rate, plane_bit_rate; u64 plane_prefill_bw; u64 plane_bw; u32 hw_latency_lines; @@ -136,13 +137,12 @@ static u64 _dpu_plane_calc_bw(const struct dpu_mdss_cfg *catalog, scale_factor = src_height > dst_height ? mult_frac(src_height, 1, dst_height) : 1; - plane_bw = - src_width * mode->vtotal * fps * fmt->bpp * - scale_factor; + plane_pixel_rate = src_width * mode->vtotal * fps; + plane_bit_rate = plane_pixel_rate * fmt->bpp; - plane_prefill_bw = - src_width * hw_latency_lines * fps * fmt->bpp * - scale_factor * mode->vtotal; + plane_bw = plane_bit_rate * scale_factor; + + plane_prefill_bw = plane_bw * hw_latency_lines; if ((vbp+vpw) > hw_latency_lines) do_div(plane_prefill_bw, (vbp+vpw)); -- cgit v1.2.3 From eba8c99a0fc45da1c8d5b5f5bd1dc2e79229a767 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 25 Aug 2023 16:01:08 -0700 Subject: drm/msm/dp: Add newlines to debug printks These debug printks are missing newlines, causing drm debug logs to be hard to read. Add newlines so that the messages are on their own line. Cc: Kuogee Hsieh Cc: Vinod Polimera Signed-off-by: Stephen Boyd Fixes: 601f0479c583 ("drm/msm/dp: add logs across DP driver for ease of debugging") Fixes: cd779808cccd ("drm/msm/dp: Add basic PSR support for eDP") Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/554533/ Link: https://lore.kernel.org/r/20230825230109.2264345-1-swboyd@chromium.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_link.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index 42427129acea..6375daaeb98e 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -1090,7 +1090,7 @@ int dp_link_process_request(struct dp_link *dp_link) } else if (dp_link_read_psr_error_status(link)) { DRM_ERROR("PSR IRQ_HPD received\n"); } else if (dp_link_psr_capability_changed(link)) { - drm_dbg_dp(link->drm_dev, "PSR Capability changed"); + drm_dbg_dp(link->drm_dev, "PSR Capability changed\n"); } else { ret = dp_link_process_link_status_update(link); if (!ret) { @@ -1107,7 +1107,7 @@ int dp_link_process_request(struct dp_link *dp_link) } } - drm_dbg_dp(link->drm_dev, "sink request=%#x", + drm_dbg_dp(link->drm_dev, "sink request=%#x\n", dp_link->sink_request); return ret; } -- cgit v1.2.3 From 3b6c4a11bf2b810f772f5c2c1ef6eef3fc268246 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 20 Sep 2023 17:04:01 +0100 Subject: soundwire: bus: Make IRQ handling conditionally built SoundWire has provisions for a simple callback for the IRQ handling so has no hard dependency on IRQ_DOMAIN, but the recent addition of IRQ handling was causing builds without IRQ_DOMAIN to fail. Resolve this by moving the IRQ handling into its own file and only add it to the build when IRQ_DOMAIN is included in the kernel. Fixes: 12a95123bfe1 ("soundwire: bus: Allow SoundWire peripherals to register IRQ handlers") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309150522.MoKeF4jx-lkp@intel.com/ Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20230920160401.854052-1-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- drivers/soundwire/Makefile | 4 +++ drivers/soundwire/bus.c | 31 ++++------------------- drivers/soundwire/bus_type.c | 11 +++------ drivers/soundwire/irq.c | 59 ++++++++++++++++++++++++++++++++++++++++++++ drivers/soundwire/irq.h | 43 ++++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 33 deletions(-) create mode 100644 drivers/soundwire/irq.c create mode 100644 drivers/soundwire/irq.h diff --git a/drivers/soundwire/Makefile b/drivers/soundwire/Makefile index c3d3ab3262d3..657f5888a77b 100644 --- a/drivers/soundwire/Makefile +++ b/drivers/soundwire/Makefile @@ -15,6 +15,10 @@ ifdef CONFIG_DEBUG_FS soundwire-bus-y += debugfs.o endif +ifdef CONFIG_IRQ_DOMAIN +soundwire-bus-y += irq.o +endif + #AMD driver soundwire-amd-y := amd_manager.o obj-$(CONFIG_SOUNDWIRE_AMD) += soundwire-amd.o diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 1720031f35a3..0e7bc3c40f9d 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -3,13 +3,13 @@ #include #include -#include #include #include #include #include #include #include "bus.h" +#include "irq.h" #include "sysfs_local.h" static DEFINE_IDA(sdw_bus_ida); @@ -25,23 +25,6 @@ static int sdw_get_id(struct sdw_bus *bus) return 0; } -static int sdw_irq_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw) -{ - struct sdw_bus *bus = h->host_data; - - irq_set_chip_data(virq, bus); - irq_set_chip(virq, &bus->irq_chip); - irq_set_nested_thread(virq, 1); - irq_set_noprobe(virq); - - return 0; -} - -static const struct irq_domain_ops sdw_domain_ops = { - .map = sdw_irq_map, -}; - /** * sdw_bus_master_add() - add a bus Master instance * @bus: bus instance @@ -168,13 +151,9 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, bus->params.curr_bank = SDW_BANK0; bus->params.next_bank = SDW_BANK1; - bus->irq_chip.name = dev_name(bus->dev); - bus->domain = irq_domain_create_linear(fwnode, SDW_MAX_DEVICES, - &sdw_domain_ops, bus); - if (!bus->domain) { - dev_err(bus->dev, "Failed to add IRQ domain\n"); - return -EINVAL; - } + ret = sdw_irq_create(bus, fwnode); + if (ret) + return ret; return 0; } @@ -213,7 +192,7 @@ void sdw_bus_master_delete(struct sdw_bus *bus) { device_for_each_child(bus->dev, NULL, sdw_delete_slave); - irq_domain_remove(bus->domain); + sdw_irq_delete(bus); sdw_master_device_del(bus); diff --git a/drivers/soundwire/bus_type.c b/drivers/soundwire/bus_type.c index fafbc284e82d..9fa93bb923d7 100644 --- a/drivers/soundwire/bus_type.c +++ b/drivers/soundwire/bus_type.c @@ -7,6 +7,7 @@ #include #include #include "bus.h" +#include "irq.h" #include "sysfs_local.h" /** @@ -122,11 +123,8 @@ static int sdw_drv_probe(struct device *dev) if (drv->ops && drv->ops->read_prop) drv->ops->read_prop(slave); - if (slave->prop.use_domain_irq) { - slave->irq = irq_create_mapping(slave->bus->domain, slave->dev_num); - if (!slave->irq) - dev_warn(dev, "Failed to map IRQ\n"); - } + if (slave->prop.use_domain_irq) + sdw_irq_create_mapping(slave); /* init the sysfs as we have properties now */ ret = sdw_slave_sysfs_init(slave); @@ -176,8 +174,7 @@ static int sdw_drv_remove(struct device *dev) slave->probed = false; if (slave->prop.use_domain_irq) - irq_dispose_mapping(irq_find_mapping(slave->bus->domain, - slave->dev_num)); + sdw_irq_dispose_mapping(slave); mutex_unlock(&slave->sdw_dev_lock); diff --git a/drivers/soundwire/irq.c b/drivers/soundwire/irq.c new file mode 100644 index 000000000000..0c08cebb1235 --- /dev/null +++ b/drivers/soundwire/irq.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2023 Cirrus Logic, Inc. and +// Cirrus Logic International Semiconductor Ltd. + +#include +#include +#include +#include +#include +#include "irq.h" + +static int sdw_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct sdw_bus *bus = h->host_data; + + irq_set_chip_data(virq, bus); + irq_set_chip(virq, &bus->irq_chip); + irq_set_nested_thread(virq, 1); + irq_set_noprobe(virq); + + return 0; +} + +static const struct irq_domain_ops sdw_domain_ops = { + .map = sdw_irq_map, +}; + +int sdw_irq_create(struct sdw_bus *bus, + struct fwnode_handle *fwnode) +{ + bus->irq_chip.name = dev_name(bus->dev); + + bus->domain = irq_domain_create_linear(fwnode, SDW_MAX_DEVICES, + &sdw_domain_ops, bus); + if (!bus->domain) { + dev_err(bus->dev, "Failed to add IRQ domain\n"); + return -EINVAL; + } + + return 0; +} + +void sdw_irq_delete(struct sdw_bus *bus) +{ + irq_domain_remove(bus->domain); +} + +void sdw_irq_create_mapping(struct sdw_slave *slave) +{ + slave->irq = irq_create_mapping(slave->bus->domain, slave->dev_num); + if (!slave->irq) + dev_warn(&slave->dev, "Failed to map IRQ\n"); +} + +void sdw_irq_dispose_mapping(struct sdw_slave *slave) +{ + irq_dispose_mapping(irq_find_mapping(slave->bus->domain, slave->dev_num)); +} diff --git a/drivers/soundwire/irq.h b/drivers/soundwire/irq.h new file mode 100644 index 000000000000..58a58046d92b --- /dev/null +++ b/drivers/soundwire/irq.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#ifndef __SDW_IRQ_H +#define __SDW_IRQ_H + +#include +#include + +#if IS_ENABLED(CONFIG_IRQ_DOMAIN) + +int sdw_irq_create(struct sdw_bus *bus, + struct fwnode_handle *fwnode); +void sdw_irq_delete(struct sdw_bus *bus); +void sdw_irq_create_mapping(struct sdw_slave *slave); +void sdw_irq_dispose_mapping(struct sdw_slave *slave); + +#else /* CONFIG_IRQ_DOMAIN */ + +static inline int sdw_irq_create(struct sdw_bus *bus, + struct fwnode_handle *fwnode) +{ + return 0; +} + +static inline void sdw_irq_delete(struct sdw_bus *bus) +{ +} + +static inline void sdw_irq_create_mapping(struct sdw_slave *slave) +{ +} + +static inline void sdw_irq_dispose_mapping(struct sdw_slave *slave) +{ +} + +#endif /* CONFIG_IRQ_DOMAIN */ + +#endif /* __SDW_IRQ_H */ -- cgit v1.2.3 From cd4aece493f99f95d41edcce32927d70a5dde923 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 20 Sep 2023 15:05:06 +0200 Subject: ACPI: EC: Add quirk for the HP Pavilion Gaming 15-dk1xxx Added GPE quirk entry for the HP Pavilion Gaming 15-dk1xxx. There is a quirk entry for 2 15-c..... laptops, this is for a new version which has 15-dk1xxx as identifier. This fixes the LID switch and rfkill and brightness hotkeys not working. Closes: https://github.com/systemd/systemd/issues/28942 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 660834a49c1f..c95d0edb0be9 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1913,6 +1913,17 @@ static const struct dmi_system_id ec_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "HP 15-cx0041ur"), }, }, + { + /* + * HP Pavilion Gaming Laptop 15-dk1xxx + * https://github.com/systemd/systemd/issues/28942 + */ + .callback = ec_honor_dsdt_gpe, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion Gaming Laptop 15-dk1xxx"), + }, + }, { /* * Samsung hardware -- cgit v1.2.3 From 053d7dcd3440fa953ef4ca08de8e0a33d23d3b29 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:51:37 -0700 Subject: fbdev: mmp: Annotate struct mmphw_ctrl with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct mmphw_ctrl. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Helge Deller --- drivers/video/fbdev/mmp/hw/mmp_ctrl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/mmp/hw/mmp_ctrl.h b/drivers/video/fbdev/mmp/hw/mmp_ctrl.h index 167585a889d3..719b99a9bc77 100644 --- a/drivers/video/fbdev/mmp/hw/mmp_ctrl.h +++ b/drivers/video/fbdev/mmp/hw/mmp_ctrl.h @@ -1406,7 +1406,7 @@ struct mmphw_ctrl { /*pathes*/ int path_num; - struct mmphw_path_plat path_plats[]; + struct mmphw_path_plat path_plats[] __counted_by(path_num); }; static inline int overlay_is_vid(struct mmp_overlay *overlay) -- cgit v1.2.3 From e34872523ca56b6a3ed7a5b06febdc66b4f16e3c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:51:41 -0700 Subject: fbdev: mmp: Annotate struct mmp_path with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct mmp_path. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Kees Cook Signed-off-by: Helge Deller --- include/video/mmp_disp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/video/mmp_disp.h b/include/video/mmp_disp.h index 77252cb46361..a722dcbf5073 100644 --- a/include/video/mmp_disp.h +++ b/include/video/mmp_disp.h @@ -231,7 +231,7 @@ struct mmp_path { /* layers */ int overlay_num; - struct mmp_overlay overlays[]; + struct mmp_overlay overlays[] __counted_by(overlay_num); }; extern struct mmp_path *mmp_get_path(const char *name); -- cgit v1.2.3 From c1a8d1d0edb71dec15c9649cb56866c71c1ecd9e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Sep 2023 19:04:21 +0800 Subject: fbdev: atyfb: only use ioremap_uc() on i386 and ia64 ioremap_uc() is only meaningful on old x86-32 systems with the PAT extension, and on ia64 with its slightly unconventional ioremap() behavior, everywhere else this is the same as ioremap() anyway. Change the only driver that still references ioremap_uc() to only do so on x86-32/ia64 in order to allow removing that interface at some point in the future for the other architectures. On some architectures, ioremap_uc() just returns NULL, changing the driver to call ioremap() means that they now have a chance of working correctly. Signed-off-by: Arnd Bergmann Signed-off-by: Baoquan He Reviewed-by: Luis Chamberlain Cc: Helge Deller Cc: Thomas Zimmermann Cc: Christophe Leroy Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Helge Deller --- drivers/video/fbdev/aty/atyfb_base.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 5c87817a4f4c..3dcf83f5e7b4 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -3440,11 +3440,15 @@ static int atyfb_setup_generic(struct pci_dev *pdev, struct fb_info *info, } info->fix.mmio_start = raddr; +#if defined(__i386__) || defined(__ia64__) /* * By using strong UC we force the MTRR to never have an * effect on the MMIO region on both non-PAT and PAT systems. */ par->ati_regbase = ioremap_uc(info->fix.mmio_start, 0x1000); +#else + par->ati_regbase = ioremap(info->fix.mmio_start, 0x1000); +#endif if (par->ati_regbase == NULL) return -ENOMEM; -- cgit v1.2.3 From f87ef5723536a6545ed9c43e18b13a9faceb3c80 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Fri, 1 Sep 2023 12:58:23 +0200 Subject: KVM: s390: fix gisa destroy operation might lead to cpu stalls A GISA cannot be destroyed as long it is linked in the GIB alert list as this would break the alert list. Just waiting for its removal from the list triggered by another vm is not sufficient as it might be the only vm. The below shown cpu stall situation might occur when GIB alerts are delayed and is fixed by calling process_gib_alert_list() instead of waiting. At this time the vcpus of the vm are already destroyed and thus no vcpu can be kicked to enter the SIE again if for some reason an interrupt is pending for that vm. Additionally the IAM restore value is set to 0x00. That would be a bug introduced by incomplete device de-registration, i.e. missing kvm_s390_gisc_unregister() call. Setting this value and the IAM in the GISA to 0x00 guarantees that late interrupts don't bring the GISA back into the alert list. CPU stall caused by kvm_s390_gisa_destroy(): [ 4915.311372] rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: { 14-.... } 24533 jiffies s: 5269 root: 0x1/. [ 4915.311390] rcu: blocking rcu_node structures (internal RCU debug): l=1:0-15:0x4000/. [ 4915.311394] Task dump for CPU 14: [ 4915.311395] task:qemu-system-s39 state:R running task stack:0 pid:217198 ppid:1 flags:0x00000045 [ 4915.311399] Call Trace: [ 4915.311401] [<0000038003a33a10>] 0x38003a33a10 [ 4933.861321] rcu: INFO: rcu_sched self-detected stall on CPU [ 4933.861332] rcu: 14-....: (42008 ticks this GP) idle=53f4/1/0x4000000000000000 softirq=61530/61530 fqs=14031 [ 4933.861353] rcu: (t=42008 jiffies g=238109 q=100360 ncpus=18) [ 4933.861357] CPU: 14 PID: 217198 Comm: qemu-system-s39 Not tainted 6.5.0-20230816.rc6.git26.a9d17c5d8813.300.fc38.s390x #1 [ 4933.861360] Hardware name: IBM 8561 T01 703 (LPAR) [ 4933.861361] Krnl PSW : 0704e00180000000 000003ff804bfc66 (kvm_s390_gisa_destroy+0x3e/0xe0 [kvm]) [ 4933.861414] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 [ 4933.861416] Krnl GPRS: 0000000000000000 00000372000000fc 00000002134f8000 000000000d5f5900 [ 4933.861419] 00000002f5ea1d18 00000002f5ea1d18 0000000000000000 0000000000000000 [ 4933.861420] 00000002134fa890 00000002134f8958 000000000d5f5900 00000002134f8000 [ 4933.861422] 000003ffa06acf98 000003ffa06858b0 0000038003a33c20 0000038003a33bc8 [ 4933.861430] Krnl Code: 000003ff804bfc58: ec66002b007e cij %r6,0,6,000003ff804bfcae 000003ff804bfc5e: b904003a lgr %r3,%r10 #000003ff804bfc62: a7f40005 brc 15,000003ff804bfc6c >000003ff804bfc66: e330b7300204 lg %r3,10032(%r11) 000003ff804bfc6c: 58003000 l %r0,0(%r3) 000003ff804bfc70: ec03fffb6076 crj %r0,%r3,6,000003ff804bfc66 000003ff804bfc76: e320b7600271 lay %r2,10080(%r11) 000003ff804bfc7c: c0e5fffea339 brasl %r14,000003ff804942ee [ 4933.861444] Call Trace: [ 4933.861445] [<000003ff804bfc66>] kvm_s390_gisa_destroy+0x3e/0xe0 [kvm] [ 4933.861460] ([<00000002623523de>] free_unref_page+0xee/0x148) [ 4933.861507] [<000003ff804aea98>] kvm_arch_destroy_vm+0x50/0x120 [kvm] [ 4933.861521] [<000003ff8049d374>] kvm_destroy_vm+0x174/0x288 [kvm] [ 4933.861532] [<000003ff8049d4fe>] kvm_vm_release+0x36/0x48 [kvm] [ 4933.861542] [<00000002623cd04a>] __fput+0xea/0x2a8 [ 4933.861547] [<00000002620d5bf8>] task_work_run+0x88/0xf0 [ 4933.861551] [<00000002620b0aa6>] do_exit+0x2c6/0x528 [ 4933.861556] [<00000002620b0f00>] do_group_exit+0x40/0xb8 [ 4933.861557] [<00000002620b0fa6>] __s390x_sys_exit_group+0x2e/0x30 [ 4933.861559] [<0000000262d481f4>] __do_syscall+0x1d4/0x200 [ 4933.861563] [<0000000262d59028>] system_call+0x70/0x98 [ 4933.861565] Last Breaking-Event-Address: [ 4933.861566] [<0000038003a33b60>] 0x38003a33b60 Fixes: 9f30f6216378 ("KVM: s390: add gib_alert_irq_handler()") Signed-off-by: Michael Mueller Reviewed-by: Matthew Rosato Reviewed-by: Nico Boehr Link: https://lore.kernel.org/r/20230901105823.3973928-1-mimu@linux.ibm.com Message-ID: <20230901105823.3973928-1-mimu@linux.ibm.com> Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/interrupt.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c1b47d608a2b..efaebba5ee19 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -303,11 +303,6 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi) return 0; } -static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa) -{ - return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa); -} - static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); @@ -3216,11 +3211,12 @@ void kvm_s390_gisa_destroy(struct kvm *kvm) if (!gi->origin) return; - if (gi->alert.mask) - KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x", - kvm, gi->alert.mask); - while (gisa_in_alert_list(gi->origin)) - cpu_relax(); + WARN(gi->alert.mask != 0x00, + "unexpected non zero alert.mask 0x%02x", + gi->alert.mask); + gi->alert.mask = 0x00; + if (gisa_set_iam(gi->origin, gi->alert.mask)) + process_gib_alert_list(); hrtimer_cancel(&gi->timer); gi->origin = NULL; VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa); -- cgit v1.2.3 From b29a2acd36dd7a33c63f260df738fb96baa3d4f8 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 4 May 2023 14:00:42 +0200 Subject: KVM: x86/pmu: Truncate counter value to allowed width on write Performance counters are defined to have width less than 64 bits. The vPMU code maintains the counters in u64 variables but assumes the value to fit within the defined width. However, for Intel non-full-width counters (MSR_IA32_PERFCTRx) the value receieved from the guest is truncated to 32 bits and then sign-extended to full 64 bits. If a negative value is set, it's sign-extended to 64 bits, but then in kvm_pmu_incr_counter() it's incremented, truncated, and compared to the previous value for overflow detection. That previous value is not truncated, so it always evaluates bigger than the truncated new one, and a PMI is injected. If the PMI handler writes a negative counter value itself, the vCPU never quits the PMI loop. Turns out that Linux PMI handler actually does write the counter with the value just read with RDPMC, so when no full-width support is exposed via MSR_IA32_PERF_CAPABILITIES, and the guest initializes the counter to a negative value, it locks up. This has been observed in the field, for example, when the guest configures atop to use perfevents and runs two instances of it simultaneously. To address the problem, maintain the invariant that the counter value always fits in the defined bit width, by truncating the received value in the respective set_msr methods. For better readability, factor the out into a helper function, pmc_write_counter(), shared by vmx and svm parts. Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") Cc: stable@vger.kernel.org Signed-off-by: Roman Kagan Link: https://lore.kernel.org/all/20230504120042.785651-1-rkagan@amazon.de Tested-by: Like Xu [sean: tweak changelog, s/set/write in the helper] Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.h | 6 ++++++ arch/x86/kvm/svm/pmu.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7d9ba301c090..1d64113de488 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc) return counter & pmc_bitmask(pmc); } +static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val) +{ + pmc->counter += val - pmc_read_counter(pmc); + pmc->counter &= pmc_bitmask(pmc); +} + static inline void pmc_release_perf_event(struct kvm_pmc *pmc) { if (pmc->perf_event) { diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index cef5a3d0abd0..373ff6a6687b 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* MSR_PERFCTRn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); return 0; } diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index f2efa0bf7ae8..820d3e1f6b4f 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -436,11 +436,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated && !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { -- cgit v1.2.3 From a16eb25b09c02a54c1c1b449d4b6cfa2cf3f013a Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 25 Sep 2023 17:34:47 +0000 Subject: KVM: x86: Mask LVTPC when handling a PMI Per the SDM, "When the local APIC handles a performance-monitoring counters interrupt, it automatically sets the mask flag in the LVT performance counter register." Add this behavior to KVM's local APIC emulation. Failure to mask the LVTPC entry results in spurious PMIs, e.g. when running Linux as a guest, PMI handlers that do a "late_ack" spew a large number of "dazed and confused" spurious NMI warnings. Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests") Cc: stable@vger.kernel.org Signed-off-by: Jim Mattson Tested-by: Mingwei Zhang Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20230925173448.3518223-3-mizhang@google.com [sean: massage changelog, correct Fixes] Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index dcd60b39e794..3e977dbbf993 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2759,13 +2759,17 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) { u32 reg = kvm_lapic_get_reg(apic, lvt_type); int vector, mode, trig_mode; + int r; if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; - return __apic_accept_irq(apic, mode, vector, 1, trig_mode, - NULL); + + r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); + if (r && lvt_type == APIC_LVTPC) + kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); + return r; } return 0; } -- cgit v1.2.3 From 73554b29bd70546c1a9efc9c160641ef1b849358 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 25 Sep 2023 17:34:46 +0000 Subject: KVM: x86/pmu: Synthesize at most one PMI per VM-exit When the irq_work callback, kvm_pmi_trigger_fn(), is invoked during a VM-exit that also invokes __kvm_perf_overflow() as a result of instruction emulation, kvm_pmu_deliver_pmi() will be called twice before the next VM-entry. Calling kvm_pmu_deliver_pmi() twice is unlikely to be problematic now that KVM sets the LVTPC mask bit when delivering a PMI. But using IRQ work to trigger the PMI is still broken, albeit very theoretically. E.g. if the self-IPI to trigger IRQ work is be delayed long enough for the vCPU to be migrated to a different pCPU, then it's possible for kvm_pmi_trigger_fn() to race with the kvm_pmu_deliver_pmi() from KVM_REQ_PMI and still generate two PMIs. KVM could set the mask bit using an atomic operation, but that'd just be piling on unnecessary code to workaround what is effectively a hack. The *only* reason KVM uses IRQ work is to ensure the PMI is treated as a wake event, e.g. if the vCPU just executed HLT. Remove the irq_work callback for synthesizing a PMI, and all of the logic for invoking it. Instead, to prevent a vcpu from leaving C0 with a PMI pending, add a check for KVM_REQ_PMI to kvm_vcpu_has_events(). Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") Signed-off-by: Jim Mattson Tested-by: Mingwei Zhang Tested-by: Dapeng Mi Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20230925173448.3518223-2-mizhang@google.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/pmu.c | 27 +-------------------------- arch/x86/kvm/x86.c | 3 +++ 3 files changed, 4 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 17715cb8731d..70d139406bc8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -528,7 +528,6 @@ struct kvm_pmu { u64 raw_event_mask; struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; - struct irq_work irq_work; /* * Overlay the bitmap with a 64-bit atomic so that all bits can be diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index edb89b51b383..9ae07db6f0f6 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) #undef __KVM_X86_PMU_OP } -static void kvm_pmi_trigger_fn(struct irq_work *irq_work) -{ - struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work); - struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); - - kvm_pmu_deliver_pmi(vcpu); -} - static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); } - if (!pmc->intr || skip_pmi) - return; - - /* - * Inject PMI. If vcpu was in a guest mode during NMI PMI - * can be ejected on a guest mode re-entry. Otherwise we can't - * be sure that vcpu wasn't executing hlt instruction at the - * time of vmexit and is not going to re-enter guest mode until - * woken up. So we should wake it, but this is impossible from - * NMI context. Do it from irq work instead. - */ - if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu)) - irq_work_queue(&pmc_to_pmu(pmc)->irq_work); - else + if (pmc->intr && !skip_pmi) kvm_make_request(KVM_REQ_PMI, pmc->vcpu); } @@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) void kvm_pmu_reset(struct kvm_vcpu *vcpu) { - struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - - irq_work_sync(&pmu->irq_work); static_call(kvm_x86_pmu_reset)(vcpu); } @@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) memset(pmu, 0, sizeof(*pmu)); static_call(kvm_x86_pmu_init)(vcpu); - init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn); pmu->event_count = 0; pmu->need_cleanup = false; kvm_pmu_refresh(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f18b06bbda6..42a4e8f5e89a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12843,6 +12843,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return true; #endif + if (kvm_test_request(KVM_REQ_PMI, vcpu)) + return true; + if (kvm_arch_interrupt_allowed(vcpu) && (kvm_cpu_has_interrupt(vcpu) || kvm_guest_apic_has_interrupt(vcpu))) -- cgit v1.2.3 From 5abb5c3cd4b38ec32c38a852c83ea04255cecf25 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 25 Sep 2023 16:38:44 +0100 Subject: riscv: errata: andes: Makefile: Fix randconfig build issue Compile the andes errata with cflags set to " -mcmodel=medany" when CONFIG_RISCV_ALTERNATIVE_EARLY is enabled. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309111311.8tcq3KVc-lkp@intel.com/ Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20230925153844.26820-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/andes/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/errata/andes/Makefile b/arch/riscv/errata/andes/Makefile index 2d644e19caef..6278c389b801 100644 --- a/arch/riscv/errata/andes/Makefile +++ b/arch/riscv/errata/andes/Makefile @@ -1 +1,5 @@ +ifdef CONFIG_RISCV_ALTERNATIVE_EARLY +CFLAGS_errata.o := -mcmodel=medany +endif + obj-y += errata.o -- cgit v1.2.3 From 3c67c5236fbf7a58c1a26d57da4465ea5fb25537 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 21 Sep 2023 10:46:52 -0400 Subject: dmaengine: fsl-dma: fix DMA error when enabling sg if 'DONE' bit is set In eDMAv3, clearing 'DONE' bit (bit 30) of CHn_CSR is required when enabling scatter-gather (SG). eDMAv4 does not require this change. Cc: stable@vger.kernel.org Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20230921144652.3259813-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-common.c | 15 ++++++++++++++- drivers/dma/fsl-edma-common.h | 14 +++++++++++++- drivers/dma/fsl-edma-main.c | 2 +- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index a0f5741abcc4..50203b82f9f5 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -448,12 +448,25 @@ static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan, edma_write_tcdreg(fsl_chan, tcd->dlast_sga, dlast_sga); + csr = le16_to_cpu(tcd->csr); + if (fsl_chan->is_sw) { - csr = le16_to_cpu(tcd->csr); csr |= EDMA_TCD_CSR_START; tcd->csr = cpu_to_le16(csr); } + /* + * Must clear CHn_CSR[DONE] bit before enable TCDn_CSR[ESG] at EDMAv3 + * eDMAv4 have not such requirement. + * Change MLINK need clear CHn_CSR[DONE] for both eDMAv3 and eDMAv4. + */ + if (((fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_CLEAR_DONE_E_SG) && + (csr & EDMA_TCD_CSR_E_SG)) || + ((fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_CLEAR_DONE_E_LINK) && + (csr & EDMA_TCD_CSR_E_LINK))) + edma_writel_chreg(fsl_chan, edma_readl_chreg(fsl_chan, ch_csr), ch_csr); + + edma_write_tcdreg(fsl_chan, tcd->csr, csr); } diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h index 3cc0cc8fc2d0..40d50cc3d75a 100644 --- a/drivers/dma/fsl-edma-common.h +++ b/drivers/dma/fsl-edma-common.h @@ -183,11 +183,23 @@ struct fsl_edma_desc { #define FSL_EDMA_DRV_BUS_8BYTE BIT(10) #define FSL_EDMA_DRV_DEV_TO_DEV BIT(11) #define FSL_EDMA_DRV_ALIGN_64BYTE BIT(12) +/* Need clean CHn_CSR DONE before enable TCD's ESG */ +#define FSL_EDMA_DRV_CLEAR_DONE_E_SG BIT(13) +/* Need clean CHn_CSR DONE before enable TCD's MAJORELINK */ +#define FSL_EDMA_DRV_CLEAR_DONE_E_LINK BIT(14) #define FSL_EDMA_DRV_EDMA3 (FSL_EDMA_DRV_SPLIT_REG | \ FSL_EDMA_DRV_BUS_8BYTE | \ FSL_EDMA_DRV_DEV_TO_DEV | \ - FSL_EDMA_DRV_ALIGN_64BYTE) + FSL_EDMA_DRV_ALIGN_64BYTE | \ + FSL_EDMA_DRV_CLEAR_DONE_E_SG | \ + FSL_EDMA_DRV_CLEAR_DONE_E_LINK) + +#define FSL_EDMA_DRV_EDMA4 (FSL_EDMA_DRV_SPLIT_REG | \ + FSL_EDMA_DRV_BUS_8BYTE | \ + FSL_EDMA_DRV_DEV_TO_DEV | \ + FSL_EDMA_DRV_ALIGN_64BYTE | \ + FSL_EDMA_DRV_CLEAR_DONE_E_LINK) struct fsl_edma_drvdata { u32 dmamuxs; /* only used before v3 */ diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 63d48d046f04..621a460fae0c 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -355,7 +355,7 @@ static struct fsl_edma_drvdata imx93_data3 = { }; static struct fsl_edma_drvdata imx93_data4 = { - .flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA3, + .flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA4, .chreg_space_sz = 0x8000, .chreg_off = 0x10000, .setup_irq = fsl_edma3_irq_init, -- cgit v1.2.3 From 54a5aff6f98b69e73cba40470f103a72bd436b20 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Wed, 20 Sep 2023 18:55:26 +0530 Subject: dt-bindings: dmaengine: zynqmp_dma: add xlnx,bus-width required property xlnx,bus-width is a required property. In yaml conversion somehow it got missed out. Bring it back and mention it in required list. Also add Harini and myself to the maintainer list. Fixes: 5a04982df8da ("dt-bindings: dmaengine: zynqmp_dma: convert to yaml") Signed-off-by: Radhey Shyam Pandey Acked-by: Krzysztof Kozlowski Reviewed-by: Michael Tretter Reviewed-by: Peter Korsgaard Link: https://lore.kernel.org/r/1695216326-3841352-1-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml index 23ada8f87526..769ce23aaac2 100644 --- a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml +++ b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml @@ -13,6 +13,8 @@ description: | maintainers: - Michael Tretter + - Harini Katakam + - Radhey Shyam Pandey allOf: - $ref: ../dma-controller.yaml# @@ -65,6 +67,7 @@ required: - interrupts - clocks - clock-names + - xlnx,bus-width additionalProperties: false -- cgit v1.2.3 From 3f4b82167a3b1f4ddb33d890f758a042ef4ceef1 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 23 Aug 2023 14:26:35 -0400 Subject: dmaengine: fsl-edma: fix edma4 channel enable failure on second attempt When attempting to start DMA for the second time using fsl_edma3_enable_request(), channel never start. CHn_MUX must have a unique value when selecting a peripheral slot in the channel mux configuration. The only value that may overlap is source 0. If there is an attempt to write a mux configuration value that is already consumed by another channel, a mux configuration of 0 (SRC = 0) will be written. Check CHn_MUX before writing in fsl_edma3_enable_request(). Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20230823182635.2618118-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-common.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index 50203b82f9f5..6a3abe5b1790 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -92,8 +92,14 @@ static void fsl_edma3_enable_request(struct fsl_edma_chan *fsl_chan) edma_writel_chreg(fsl_chan, val, ch_sbr); - if (flags & FSL_EDMA_DRV_HAS_CHMUX) - edma_writel_chreg(fsl_chan, fsl_chan->srcid, ch_mux); + if (flags & FSL_EDMA_DRV_HAS_CHMUX) { + /* + * ch_mux: With the exception of 0, attempts to write a value + * already in use will be forced to 0. + */ + if (!edma_readl_chreg(fsl_chan, ch_mux)) + edma_writel_chreg(fsl_chan, fsl_chan->srcid, ch_mux); + } val = edma_readl_chreg(fsl_chan, ch_csr); val |= EDMA_V3_CH_CSR_ERQ; -- cgit v1.2.3 From 87d1888aa40f25773fa0b948bcb2545f97e2cb15 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 15:52:19 +0400 Subject: fs/ntfs3: Add ckeck in ni_update_parent() Check simple case when parent inode equals current inode. Signed-off-by: Konstantin Komarov --- fs/ntfs3/frecord.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 2b85cb10f0be..d49fbb22bd5e 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3208,6 +3208,12 @@ static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup, if (!fname || !memcmp(&fname->dup, dup, sizeof(fname->dup))) continue; + /* Check simple case when parent inode equals current inode. */ + if (ino_get(&fname->home) == ni->vfs_inode.i_ino) { + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + continue; + } + /* ntfs_iget5 may sleep. */ dir = ntfs_iget5(sb, &fname->home, NULL); if (IS_ERR(dir)) { -- cgit v1.2.3 From 06ccfb00645990a9fcc14249e6d1c25921ecb836 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 15:57:19 +0400 Subject: fs/ntfs3: Write immediately updated ntfs state Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 33afee0f5559..edb51dc12f65 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -983,18 +983,11 @@ out: if (err) return err; - mark_inode_dirty(&ni->vfs_inode); + mark_inode_dirty_sync(&ni->vfs_inode); /* verify(!ntfs_update_mftmirr()); */ - /* - * If we used wait=1, sync_inode_metadata waits for the io for the - * inode to finish. It hangs when media is removed. - * So wait=0 is sent down to sync_inode_metadata - * and filemap_fdatawrite is used for the data blocks. - */ - err = sync_inode_metadata(&ni->vfs_inode, 0); - if (!err) - err = filemap_fdatawrite(ni->vfs_inode.i_mapping); + /* write mft record on disk. */ + err = _ni_write_inode(&ni->vfs_inode, 1); return err; } -- cgit v1.2.3 From fc471e39e38fea6677017cbdd6d928088a59fc67 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:12:58 +0400 Subject: fs/ntfs3: Use kvmalloc instead of kmalloc(... __GFP_NOWARN) Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrlist.c | 15 +++++++++++++-- fs/ntfs3/bitmap.c | 3 ++- fs/ntfs3/super.c | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 42631b31adf1..7c01735d1219 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -52,7 +52,8 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (!attr->non_res) { lsize = le32_to_cpu(attr->res.data_size); - le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN); + /* attr is resident: lsize < record_size (1K or 4K) */ + le = kvmalloc(al_aligned(lsize), GFP_KERNEL); if (!le) { err = -ENOMEM; goto out; @@ -80,7 +81,17 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (err < 0) goto out; - le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN); + /* attr is nonresident. + * The worst case: + * 1T (2^40) extremely fragmented file. + * cluster = 4K (2^12) => 2^28 fragments + * 2^9 fragments per one record => 2^19 records + * 2^5 bytes of ATTR_LIST_ENTRY per one record => 2^24 bytes. + * + * the result is 16M bytes per attribute list. + * Use kvmalloc to allocate in range [several Kbytes - dozen Mbytes] + */ + le = kvmalloc(al_aligned(lsize), GFP_KERNEL); if (!le) { err = -ENOMEM; goto out; diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index 107e808e06ea..d66055e30aff 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -659,7 +659,8 @@ int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) wnd->bits_last = wbits; wnd->free_bits = - kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS | __GFP_NOWARN); + kvmalloc_array(wnd->nwnd, sizeof(u16), GFP_KERNEL | __GFP_ZERO); + if (!wnd->free_bits) return -ENOMEM; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index cfec5e0c7f66..ea4e218c3f75 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1367,7 +1367,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) } bytes = inode->i_size; - sbi->def_table = t = kmalloc(bytes, GFP_NOFS | __GFP_NOWARN); + sbi->def_table = t = kvmalloc(bytes, GFP_KERNEL); if (!t) { err = -ENOMEM; goto put_inode_out; -- cgit v1.2.3 From 013ff63b649475f0ee134e2c8d0c8e65284ede50 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:17:02 +0400 Subject: fs/ntfs3: Add more attributes checks in mi_enum_attr() Signed-off-by: Konstantin Komarov --- fs/ntfs3/record.c | 68 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index c12ebffc94da..02cc91ed8835 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -193,8 +193,9 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) { const struct MFT_REC *rec = mi->mrec; u32 used = le32_to_cpu(rec->used); - u32 t32, off, asize; + u32 t32, off, asize, prev_type; u16 t16; + u64 data_size, alloc_size, tot_size; if (!attr) { u32 total = le32_to_cpu(rec->total); @@ -213,6 +214,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) if (!is_rec_inuse(rec)) return NULL; + prev_type = 0; attr = Add2Ptr(rec, off); } else { /* Check if input attr inside record. */ @@ -226,11 +228,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return NULL; } - if (off + asize < off) { - /* Overflow check. */ + /* Overflow check. */ + if (off + asize < off) return NULL; - } + prev_type = le32_to_cpu(attr->type); attr = Add2Ptr(attr, asize); off += asize; } @@ -250,7 +252,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) /* 0x100 is last known attribute for now. */ t32 = le32_to_cpu(attr->type); - if ((t32 & 0xf) || (t32 > 0x100)) + if (!t32 || (t32 & 0xf) || (t32 > 0x100)) + return NULL; + + /* attributes in record must be ordered by type */ + if (t32 < prev_type) return NULL; /* Check overflow and boundary. */ @@ -259,16 +265,15 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) /* Check size of attribute. */ if (!attr->non_res) { + /* Check resident fields. */ if (asize < SIZEOF_RESIDENT) return NULL; t16 = le16_to_cpu(attr->res.data_off); - if (t16 > asize) return NULL; - t32 = le32_to_cpu(attr->res.data_size); - if (t16 + t32 > asize) + if (t16 + le32_to_cpu(attr->res.data_size) > asize) return NULL; t32 = sizeof(short) * attr->name_len; @@ -278,21 +283,52 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return attr; } - /* Check some nonresident fields. */ - if (attr->name_len && - le16_to_cpu(attr->name_off) + sizeof(short) * attr->name_len > - le16_to_cpu(attr->nres.run_off)) { + /* Check nonresident fields. */ + if (attr->non_res != 1) + return NULL; + + t16 = le16_to_cpu(attr->nres.run_off); + if (t16 > asize) + return NULL; + + t32 = sizeof(short) * attr->name_len; + if (t32 && le16_to_cpu(attr->name_off) + t32 > t16) + return NULL; + + /* Check start/end vcn. */ + if (le64_to_cpu(attr->nres.svcn) > le64_to_cpu(attr->nres.evcn) + 1) + return NULL; + + data_size = le64_to_cpu(attr->nres.data_size); + if (le64_to_cpu(attr->nres.valid_size) > data_size) return NULL; - } - if (attr->nres.svcn || !is_attr_ext(attr)) { + alloc_size = le64_to_cpu(attr->nres.alloc_size); + if (data_size > alloc_size) + return NULL; + + t32 = mi->sbi->cluster_mask; + if (alloc_size & t32) + return NULL; + + if (!attr->nres.svcn && is_attr_ext(attr)) { + /* First segment of sparse/compressed attribute */ + if (asize + 8 < SIZEOF_NONRESIDENT_EX) + return NULL; + + tot_size = le64_to_cpu(attr->nres.total_size); + if (tot_size & t32) + return NULL; + + if (tot_size > alloc_size) + return NULL; + } else { if (asize + 8 < SIZEOF_NONRESIDENT) return NULL; if (attr->nres.c_unit) return NULL; - } else if (asize + 8 < SIZEOF_NONRESIDENT_EX) - return NULL; + } return attr; } -- cgit v1.2.3 From bfbe5b31caa74ab97f1784fe9ade5f45e0d3de91 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:22:53 +0400 Subject: fs/ntfs3: fix deadlock in mark_as_free_ex Reported-by: syzbot+e94d98936a0ed08bde43@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index edb51dc12f65..fbfe21dbb425 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -2454,10 +2454,12 @@ void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) { CLST end, i, zone_len, zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; + bool dirty = false; down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); if (!wnd_is_used(wnd, lcn, len)) { - ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + /* mark volume as dirty out of wnd->rw_lock */ + dirty = true; end = lcn + len; len = 0; @@ -2511,6 +2513,8 @@ void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) out: up_write(&wnd->rw_lock); + if (dirty) + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } /* -- cgit v1.2.3 From 91a4b1ee78cb100b19b70f077c247f211110348f Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:25:25 +0400 Subject: fs/ntfs3: Fix shift-out-of-bounds in ntfs_fill_super Reported-by: syzbot+478c1bf0e6bf4a8f3a04@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov --- fs/ntfs3/ntfs_fs.h | 2 ++ fs/ntfs3/super.c | 26 ++++++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 629403ede6e5..788567d71d93 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -42,9 +42,11 @@ enum utf16_endian; #define MINUS_ONE_T ((size_t)(-1)) /* Biggest MFT / smallest cluster */ #define MAXIMUM_BYTES_PER_MFT 4096 +#define MAXIMUM_SHIFT_BYTES_PER_MFT 12 #define NTFS_BLOCKS_PER_MFT_RECORD (MAXIMUM_BYTES_PER_MFT / 512) #define MAXIMUM_BYTES_PER_INDEX 4096 +#define MAXIMUM_SHIFT_BYTES_PER_INDEX 12 #define NTFS_BLOCKS_PER_INODE (MAXIMUM_BYTES_PER_INDEX / 512) /* NTFS specific error code when fixup failed. */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index ea4e218c3f75..f78c67452b2a 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -899,9 +899,17 @@ check_boot: goto out; } - sbi->record_size = record_size = - boot->record_size < 0 ? 1 << (-boot->record_size) : - (u32)boot->record_size << cluster_bits; + if (boot->record_size >= 0) { + record_size = (u32)boot->record_size << cluster_bits; + } else if (-boot->record_size <= MAXIMUM_SHIFT_BYTES_PER_MFT) { + record_size = 1u << (-boot->record_size); + } else { + ntfs_err(sb, "%s: invalid record size %d.", hint, + boot->record_size); + goto out; + } + + sbi->record_size = record_size; sbi->record_bits = blksize_bits(record_size); sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes @@ -918,9 +926,15 @@ check_boot: goto out; } - sbi->index_size = boot->index_size < 0 ? - 1u << (-boot->index_size) : - (u32)boot->index_size << cluster_bits; + if (boot->index_size >= 0) { + sbi->index_size = (u32)boot->index_size << cluster_bits; + } else if (-boot->index_size <= MAXIMUM_SHIFT_BYTES_PER_INDEX) { + sbi->index_size = 1u << (-boot->index_size); + } else { + ntfs_err(sb, "%s: invalid index size %d.", hint, + boot->index_size); + goto out; + } /* Check index record size. */ if (sbi->index_size < SECTOR_SIZE || !is_power_of_2(sbi->index_size)) { -- cgit v1.2.3 From 3f2f09f18972dc4548feebd9946c10c04351333d Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Sep 2023 13:01:04 +0300 Subject: fs/ntfs3: Use inode_set_ctime_to_ts instead of inode_set_ctime Signed-off-by: Konstantin Komarov --- fs/ntfs3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index eb2ed0701495..2f76dc055c1f 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -170,8 +170,8 @@ next_attr: nt2kernel(std5->cr_time, &ni->i_crtime); #endif nt2kernel(std5->a_time, &inode->i_atime); - ctime = inode_get_ctime(inode); nt2kernel(std5->c_time, &ctime); + inode_set_ctime_to_ts(inode, ctime); nt2kernel(std5->m_time, &inode->i_mtime); ni->std_fa = std5->fa; -- cgit v1.2.3 From 4ad5c924df6cd6d85708fa23f9d9a2b78a2e428e Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Sep 2023 13:07:59 +0300 Subject: fs/ntfs3: Allow repeated call to ntfs3_put_sbi Signed-off-by: Konstantin Komarov --- fs/ntfs3/bitmap.c | 1 + fs/ntfs3/super.c | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index d66055e30aff..63f14a0232f6 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -125,6 +125,7 @@ void wnd_close(struct wnd_bitmap *wnd) struct rb_node *node, *next; kfree(wnd->free_bits); + wnd->free_bits = NULL; run_close(&wnd->run); node = rb_first(&wnd->start_tree); diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index f78c67452b2a..71c80c578feb 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -576,20 +576,30 @@ static noinline void ntfs3_put_sbi(struct ntfs_sb_info *sbi) wnd_close(&sbi->mft.bitmap); wnd_close(&sbi->used.bitmap); - if (sbi->mft.ni) + if (sbi->mft.ni) { iput(&sbi->mft.ni->vfs_inode); + sbi->mft.ni = NULL; + } - if (sbi->security.ni) + if (sbi->security.ni) { iput(&sbi->security.ni->vfs_inode); + sbi->security.ni = NULL; + } - if (sbi->reparse.ni) + if (sbi->reparse.ni) { iput(&sbi->reparse.ni->vfs_inode); + sbi->reparse.ni = NULL; + } - if (sbi->objid.ni) + if (sbi->objid.ni) { iput(&sbi->objid.ni->vfs_inode); + sbi->objid.ni = NULL; + } - if (sbi->volume.ni) + if (sbi->volume.ni) { iput(&sbi->volume.ni->vfs_inode); + sbi->volume.ni = NULL; + } ntfs_update_mftmirr(sbi, 0); @@ -1577,6 +1587,7 @@ put_inode_out: iput(inode); out: kfree(boot2); + ntfs3_put_sbi(sbi); return err; } -- cgit v1.2.3 From dcc852e509a4cba0ac6ac734077cef260e4e0fe6 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Sep 2023 13:12:11 +0300 Subject: fs/ntfs3: Fix alternative boot searching Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 71c80c578feb..d2951b23f52a 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -846,7 +846,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, struct ntfs_sb_info *sbi = sb->s_fs_info; int err; u32 mb, gb, boot_sector_size, sct_per_clst, record_size; - u64 sectors, clusters, mlcn, mlcn2; + u64 sectors, clusters, mlcn, mlcn2, dev_size0; struct NTFS_BOOT *boot; struct buffer_head *bh; struct MFT_REC *rec; @@ -855,6 +855,9 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, u32 boot_off = 0; const char *hint = "Primary boot"; + /* Save original dev_size. Used with alternative boot. */ + dev_size0 = dev_size; + sbi->volume.blocks = dev_size >> PAGE_SHIFT; bh = ntfs_bread(sb, 0); @@ -1087,9 +1090,9 @@ check_boot: } out: - if (err == -EINVAL && !bh->b_blocknr && dev_size > PAGE_SHIFT) { + if (err == -EINVAL && !bh->b_blocknr && dev_size0 > PAGE_SHIFT) { u32 block_size = min_t(u32, sector_size, PAGE_SIZE); - u64 lbo = dev_size - sizeof(*boot); + u64 lbo = dev_size0 - sizeof(*boot); /* * Try alternative boot (last sector) @@ -1103,6 +1106,7 @@ out: boot_off = lbo & (block_size - 1); hint = "Alternative boot"; + dev_size = dev_size0; /* restore original size. */ goto check_boot; } brelse(bh); -- cgit v1.2.3 From f684073c09fd4658faa0a75c12b65d89a58b8e83 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Sep 2023 10:47:07 +0300 Subject: fs/ntfs3: Refactoring and comments Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 6 +++--- fs/ntfs3/file.c | 4 ++-- fs/ntfs3/inode.c | 3 ++- fs/ntfs3/namei.c | 6 +++--- fs/ntfs3/ntfs.h | 2 +- fs/ntfs3/ntfs_fs.h | 2 -- fs/ntfs3/record.c | 6 ++++++ fs/ntfs3/super.c | 19 ++++++++----------- 8 files changed, 25 insertions(+), 23 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index a9d82bbb4729..e16487764282 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1106,10 +1106,10 @@ repack: } } - /* + /* * The code below may require additional cluster (to extend attribute list) - * and / or one MFT record - * It is too complex to undo operations if -ENOSPC occurs deep inside + * and / or one MFT record + * It is too complex to undo operations if -ENOSPC occurs deep inside * in 'ni_insert_nonresident'. * Return in advance -ENOSPC here if there are no free cluster and no free MFT. */ diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 962f12ce6c0a..1f7a194983c5 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -745,8 +745,8 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) } static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, unsigned int flags) + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) { struct inode *inode = in->f_mapping->host; struct ntfs_inode *ni = ntfs_i(inode); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 2f76dc055c1f..d6d021e19aaa 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -1660,7 +1660,8 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, d_instantiate(dentry, inode); /* Set original time. inode times (i_ctime) may be changed in ntfs_init_acl. */ - inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode, ni->i_crtime); + inode->i_atime = inode->i_mtime = + inode_set_ctime_to_ts(inode, ni->i_crtime); dir->i_mtime = inode_set_ctime_to_ts(dir, ni->i_crtime); mark_inode_dirty(dir); diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index ad430d50bd79..eedacf94edd8 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -156,8 +156,8 @@ static int ntfs_link(struct dentry *ode, struct inode *dir, struct dentry *de) err = ntfs_link_inode(inode, de); if (!err) { - dir->i_mtime = inode_set_ctime_to_ts(inode, - inode_set_ctime_current(dir)); + dir->i_mtime = inode_set_ctime_to_ts( + inode, inode_set_ctime_current(dir)); mark_inode_dirty(inode); mark_inode_dirty(dir); d_instantiate(de, inode); @@ -373,7 +373,7 @@ static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry, #ifdef CONFIG_NTFS3_FS_POSIX_ACL if (IS_POSIXACL(dir)) { - /* + /* * Load in cache current acl to avoid ni_lock(dir): * ntfs_create_inode -> ntfs_init_acl -> posix_acl_create -> * ntfs_get_acl -> ntfs_get_acl_ex -> ni_lock diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 98b76d1b09e7..86aecbb01a92 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -847,7 +847,7 @@ struct OBJECT_ID { // Birth Volume Id is the Object Id of the Volume on. // which the Object Id was allocated. It never changes. struct GUID BirthVolumeId; //0x10: - + // Birth Object Id is the first Object Id that was // ever assigned to this MFT Record. I.e. If the Object Id // is changed for some reason, this field will reflect the diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 788567d71d93..0e6a2777870c 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -497,8 +497,6 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, u32 flags); int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); -void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn, - CLST len); int ntfs_file_open(struct inode *inode, struct file *file); int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 02cc91ed8835..53629b1f65e9 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -189,6 +189,12 @@ out: return err; } +/* + * mi_enum_attr - start/continue attributes enumeration in record. + * + * NOTE: mi->mrec - memory of size sbi->record_size + * here we sure that mi->mrec->total == sbi->record_size (see mi_read) + */ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) { const struct MFT_REC *rec = mi->mrec; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index d2951b23f52a..f9a214367113 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -488,7 +488,6 @@ static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, { int err; struct super_block *sb = pde_data(file_inode(file)); - struct ntfs_sb_info *sbi = sb->s_fs_info; ssize_t ret = count; u8 *label = kmalloc(count, GFP_NOFS); @@ -502,7 +501,7 @@ static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, while (ret > 0 && label[ret - 1] == '\n') ret -= 1; - err = ntfs_set_label(sbi, label, ret); + err = ntfs_set_label(sb->s_fs_info, label, ret); if (err < 0) { ntfs_err(sb, "failed (%d) to write label", err); @@ -1082,10 +1081,10 @@ check_boot: if (bh->b_blocknr && !sb_rdonly(sb)) { /* - * Alternative boot is ok but primary is not ok. - * Do not update primary boot here 'cause it may be faked boot. - * Let ntfs to be mounted and update boot later. - */ + * Alternative boot is ok but primary is not ok. + * Do not update primary boot here 'cause it may be faked boot. + * Let ntfs to be mounted and update boot later. + */ *boot2 = kmemdup(boot, sizeof(*boot), GFP_NOFS | __GFP_NOWARN); } @@ -1549,9 +1548,9 @@ load_root: if (boot2) { /* - * Alternative boot is ok but primary is not ok. - * Volume is recognized as NTFS. Update primary boot. - */ + * Alternative boot is ok but primary is not ok. + * Volume is recognized as NTFS. Update primary boot. + */ struct buffer_head *bh0 = sb_getblk(sb, 0); if (bh0) { if (buffer_locked(bh0)) @@ -1785,7 +1784,6 @@ static int __init init_ntfs_fs(void) if (IS_ENABLED(CONFIG_NTFS3_LZX_XPRESS)) pr_info("ntfs3: Read-only LZX/Xpress compression included\n"); - #ifdef CONFIG_PROC_FS /* Create "/proc/fs/ntfs3" */ proc_info_root = proc_mkdir("fs/ntfs3", NULL); @@ -1827,7 +1825,6 @@ static void __exit exit_ntfs_fs(void) if (proc_info_root) remove_proc_entry("fs/ntfs3", NULL); #endif - } MODULE_LICENSE("GPL"); -- cgit v1.2.3 From d27e202b9ac416e52093edf8789614d93dbd6231 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Sep 2023 10:54:07 +0300 Subject: fs/ntfs3: Add more info into /proc/fs/ntfs3//volinfo Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index f9a214367113..5811da7e9d45 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -453,15 +453,23 @@ static struct proc_dir_entry *proc_info_root; * ntfs3.1 * cluster size * number of clusters + * total number of mft records + * number of used mft records ~= number of files + folders + * real state of ntfs "dirty"/"clean" + * current state of ntfs "dirty"/"clean" */ static int ntfs3_volinfo(struct seq_file *m, void *o) { struct super_block *sb = m->private; struct ntfs_sb_info *sbi = sb->s_fs_info; - seq_printf(m, "ntfs%d.%d\n%u\n%zu\n", sbi->volume.major_ver, - sbi->volume.minor_ver, sbi->cluster_size, - sbi->used.bitmap.nbits); + seq_printf(m, "ntfs%d.%d\n%u\n%zu\n\%zu\n%zu\n%s\n%s\n", + sbi->volume.major_ver, sbi->volume.minor_ver, + sbi->cluster_size, sbi->used.bitmap.nbits, + sbi->mft.bitmap.nbits, + sbi->mft.bitmap.nbits - wnd_zeroes(&sbi->mft.bitmap), + sbi->volume.real_dirty ? "dirty" : "clean", + (sbi->volume.flags & VOLUME_FLAG_DIRTY) ? "dirty" : "clean"); return 0; } -- cgit v1.2.3 From e52dce610a2d53bf2b5e94a8843c71cb73a91ea5 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Sep 2023 10:56:15 +0300 Subject: fs/ntfs3: Do not allow to change label if volume is read-only Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 5811da7e9d45..cf0a720523f0 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -497,7 +497,12 @@ static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, int err; struct super_block *sb = pde_data(file_inode(file)); ssize_t ret = count; - u8 *label = kmalloc(count, GFP_NOFS); + u8 *label; + + if (sb_rdonly(sb)) + return -EROFS; + + label = kmalloc(count, GFP_NOFS); if (!label) return -ENOMEM; -- cgit v1.2.3 From 32e9212256b88f35466642f9c939bb40cfb2c2de Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 26 Sep 2023 11:19:08 +0300 Subject: fs/ntfs3: Fix possible NULL-ptr-deref in ni_readpage_cmpr() Signed-off-by: Konstantin Komarov --- fs/ntfs3/frecord.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index d49fbb22bd5e..dad976a68985 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -2148,7 +2148,7 @@ out1: for (i = 0; i < pages_per_frame; i++) { pg = pages[i]; - if (i == idx) + if (i == idx || !pg) continue; unlock_page(pg); put_page(pg); -- cgit v1.2.3 From 9c689c8dc86f8ca99bf91c05f24c8bab38fe7d5f Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 26 Sep 2023 11:28:11 +0300 Subject: fs/ntfs3: Fix NULL pointer dereference on error in attr_allocate_frame() Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index e16487764282..63f70259edc0 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1736,10 +1736,8 @@ repack: le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); - if (!attr_b) { - err = -ENOENT; - goto out; - } + if (!attr_b) + return -ENOENT; attr = attr_b; le = le_b; -- cgit v1.2.3 From 8e7e27b2ee1e19c4040d4987e345f678a74c0aed Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 20 Apr 2023 15:46:22 +0800 Subject: fs/ntfs3: fix panic about slab-out-of-bounds caused by ntfs_list_ea() Here is a BUG report about linux-6.1 from syzbot, but it still remains within upstream: BUG: KASAN: slab-out-of-bounds in ntfs_list_ea fs/ntfs3/xattr.c:191 [inline] BUG: KASAN: slab-out-of-bounds in ntfs_listxattr+0x401/0x570 fs/ntfs3/xattr.c:710 Read of size 1 at addr ffff888021acaf3d by task syz-executor128/3632 Call Trace: kasan_report+0x139/0x170 mm/kasan/report.c:495 ntfs_list_ea fs/ntfs3/xattr.c:191 [inline] ntfs_listxattr+0x401/0x570 fs/ntfs3/xattr.c:710 vfs_listxattr fs/xattr.c:457 [inline] listxattr+0x293/0x2d0 fs/xattr.c:804 path_listxattr fs/xattr.c:828 [inline] __do_sys_llistxattr fs/xattr.c:846 [inline] Before derefering field members of `ea` in unpacked_ea_size(), we need to check whether the EA_FULL struct is located in access validate range. Similarly, when derefering `ea->name` field member, we need to check whethe the ea->name is located in access validate range, too. Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") Reported-by: syzbot+9fcea5ef6dc4dc72d334@syzkaller.appspotmail.com Signed-off-by: Zeng Heng [almaz.alexandrovich@paragon-software.com: took the ret variable out of the loop block] Signed-off-by: Konstantin Komarov --- fs/ntfs3/xattr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 29fd391899e5..4920548192a0 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -211,7 +211,8 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, size = le32_to_cpu(info->size); /* Enumerate all xattrs. */ - for (ret = 0, off = 0; off < size; off += ea_size) { + ret = 0; + for (off = 0; off + sizeof(struct EA_FULL) < size; off += ea_size) { ea = Add2Ptr(ea_all, off); ea_size = unpacked_ea_size(ea); @@ -219,6 +220,10 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, break; if (buffer) { + /* Check if we can use field ea->name */ + if (off + ea_size > size) + break; + if (ret + ea->name_len + 1 > bytes_per_buffer) { err = -ERANGE; goto out; -- cgit v1.2.3 From 34e6552a442f268eefd408e47f4f2d471aa64829 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 13 Jul 2023 22:41:46 +0300 Subject: fs/ntfs3: Fix OOB read in ntfs_init_from_boot Syzbot was able to create a device which has the last sector of size 512. After failing to boot from initial sector, reading from boot info from offset 511 causes OOB read. To prevent such reports add sanity check to validate if size of buffer_head if big enough to hold ntfs3 bootinfo Fixes: 6a4cd3ea7d77 ("fs/ntfs3: Alternative boot if primary boot is corrupted") Reported-by: syzbot+53ce40c8c0322c06aea5@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index cf0a720523f0..5341ed80e2d2 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -878,6 +878,11 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, check_boot: err = -EINVAL; + + /* Corrupted image; do not read OOB */ + if (bh->b_size - sizeof(*boot) < boot_off) + goto out; + boot = (struct NTFS_BOOT *)Add2Ptr(bh->b_data, boot_off); if (memcmp(boot->system_id, "NTFS ", sizeof("NTFS ") - 1)) { -- cgit v1.2.3 From 1f9b94af923c88539426ed811ae7e9543834a5c5 Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Wed, 9 Aug 2023 12:11:18 -0700 Subject: fs/ntfs3: Fix possible null-pointer dereference in hdr_find_e() Upon investigation of the C reproducer provided by Syzbot, it seemed the reproducer was trying to mount a corrupted NTFS filesystem, then issue a rename syscall to some nodes in the filesystem. This can be shown by modifying the reproducer to only include the mount syscall, and investigating the filesystem by e.g. `ls` and `rm` commands. As a result, during the problematic call to `hdr_fine_e`, the `inode` being supplied did not go through `indx_init`, hence the `cmp` function pointer was never set. The fix is simply to check whether `cmp` is not set, and return NULL if that's the case, in order to be consistent with other error scenarios of the `hdr_find_e` method. The rationale behind this patch is that: - We should prevent crashing the kernel even if the mounted filesystem is corrupted. Any syscalls made on the filesystem could return invalid, but the kernel should be able to sustain these calls. - Only very specific corruption would lead to this bug, so it would be a pretty rare case in actual usage anyways. Therefore, introducing a check to specifically protect against this bug seems appropriate. Because of its rarity, an `unlikely` clause is used to wrap around this nullity check. Reported-by: syzbot+60cf892fc31d1f4358fc@syzkaller.appspotmail.com Signed-off-by: Ziqi Zhao Signed-off-by: Konstantin Komarov --- fs/ntfs3/index.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 124c6e822623..cf92b2433f7a 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -729,6 +729,9 @@ static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx, u32 total = le32_to_cpu(hdr->total); u16 offs[128]; + if (unlikely(!cmp)) + return NULL; + fill_table: if (end > total) return NULL; -- cgit v1.2.3 From 85a4780dc96ed9dd643bbadf236552b3320fae26 Mon Sep 17 00:00:00 2001 From: Gabriel Marcano Date: Tue, 12 Sep 2023 21:50:32 -0700 Subject: fs/ntfs3: Fix directory element type detection Calling stat() from userspace correctly identified junctions in an NTFS partition as symlinks, but using readdir() and iterating through the directory containing the same junction did not identify the junction as a symlink. When emitting directory contents, check FILE_ATTRIBUTE_REPARSE_POINT attribute to detect junctions and report them as links. Signed-off-by: Gabriel Marcano Signed-off-by: Konstantin Komarov --- fs/ntfs3/dir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 063a6654199b..ec0566b322d5 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -309,7 +309,11 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, return 0; } - dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + /* NTFS: symlinks are "dir + reparse" or "file + reparse" */ + if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) + dt_type = DT_LNK; + else + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } -- cgit v1.2.3 From e4494770a5cad3c9d1d2a65ed15d07656c0d9b82 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Mon, 25 Sep 2023 12:48:07 +0800 Subject: fs/ntfs3: Avoid possible memory leak smatch warn: fs/ntfs3/fslog.c:2172 last_log_lsn() warn: possible memory leak of 'page_bufs' Jump to label 'out' to free 'page_bufs' and is more consistent with other code. Signed-off-by: Su Hui Signed-off-by: Konstantin Komarov --- fs/ntfs3/fslog.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 12f28cdf5c83..98ccb6650858 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -2168,8 +2168,10 @@ file_is_valid: if (!page) { page = kmalloc(log->page_size, GFP_NOFS); - if (!page) - return -ENOMEM; + if (!page) { + err = -ENOMEM; + goto out; + } } /* -- cgit v1.2.3 From 3b678768c0458e6d8d45fadf61423e44effed4cb Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 29 Sep 2023 22:53:36 +0530 Subject: powerpc/pseries: Fix STK_PARAM access in the hcall tracing code In powerpc pseries system, below behaviour is observed while enabling tracing on hcall: # cd /sys/kernel/debug/tracing/ # cat events/powerpc/hcall_exit/enable 0 # echo 1 > events/powerpc/hcall_exit/enable # ls -bash: fork: Bad address Above is from power9 lpar with latest kernel. Past this, softlockup is observed. Initially while attempting via perf_event_open to use "PERF_TYPE_TRACEPOINT", kernel panic was observed. perf config used: ================ memset(&pe[1],0,sizeof(struct perf_event_attr)); pe[1].type=PERF_TYPE_TRACEPOINT; pe[1].size=96; pe[1].config=0x26ULL; /* 38 raw_syscalls/sys_exit */ pe[1].sample_type=0; /* 0 */ pe[1].read_format=PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING|PERF_FORMAT_ID|PERF_FORMAT_GROUP|0x10ULL; /* 1f */ pe[1].inherit=1; pe[1].precise_ip=0; /* arbitrary skid */ pe[1].wakeup_events=0; pe[1].bp_type=HW_BREAKPOINT_EMPTY; pe[1].config1=0x1ULL; Kernel panic logs: ================== Kernel attempted to read user page (8) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000008 Faulting instruction address: 0xc0000000004c2814 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: nfnetlink bonding tls rfkill sunrpc dm_service_time dm_multipath pseries_rng xts vmx_crypto xfs libcrc32c sd_mod t10_pi crc64_rocksoft crc64 sg ibmvfc scsi_transport_fc ibmveth dm_mirror dm_region_hash dm_log dm_mod fuse CPU: 0 PID: 1431 Comm: login Not tainted 6.4.0+ #1 Hardware name: IBM,8375-42A POWER9 (raw) 0x4e0202 0xf000005 of:IBM,FW950.30 (VL950_892) hv:phyp pSeries NIP page_remove_rmap+0x44/0x320 LR wp_page_copy+0x384/0xec0 Call Trace: 0xc00000001416e400 (unreliable) wp_page_copy+0x384/0xec0 __handle_mm_fault+0x9d4/0xfb0 handle_mm_fault+0xf0/0x350 ___do_page_fault+0x48c/0xc90 hash__do_page_fault+0x30/0x70 do_hash_fault+0x1a4/0x330 data_access_common_virt+0x198/0x1f0 --- interrupt: 300 at 0x7fffae971abc git bisect tracked this down to below commit: 'commit baa49d81a94b ("powerpc/pseries: hvcall stack frame overhead")' This commit changed STACK_FRAME_OVERHEAD (112 ) to STACK_FRAME_MIN_SIZE (32 ) since 32 bytes is the minimum size for ELFv2 stack. With the latest kernel, when running on ELFv2, STACK_FRAME_MIN_SIZE is used to allocate stack size. During plpar_hcall_trace, first call is made to HCALL_INST_PRECALL which saves the registers and allocates new stack frame. In the plpar_hcall_trace code, STK_PARAM is accessed at two places. 1. To save r4: std r4,STK_PARAM(R4)(r1) 2. To access r4 back: ld r12,STK_PARAM(R4)(r1) HCALL_INST_PRECALL precall allocates a new stack frame. So all the stack parameter access after the precall, needs to be accessed with +STACK_FRAME_MIN_SIZE. So the store instruction should be: std r4,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) If the "std" is not updated with STACK_FRAME_MIN_SIZE, we will end up with overwriting stack contents and cause corruption. But instead of updating 'std', we can instead remove it since HCALL_INST_PRECALL already saves it to the correct location. similarly load instruction should be: ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) Fix the load instruction to correctly access the stack parameter with +STACK_FRAME_MIN_SIZE and remove the store of r4 since the precall saves it correctly. Cc: stable@vger.kernel.org # v6.2+ Fixes: baa49d81a94b ("powerpc/pseries: hvcall stack frame overhead") Co-developed-by: Naveen N Rao Signed-off-by: Naveen N Rao Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://msgid.link/20230929172337.7906-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/hvCall.S | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index bae45b358a09..2addf2ea03f0 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -184,7 +184,6 @@ _GLOBAL_TOC(plpar_hcall) plpar_hcall_trace: HCALL_INST_PRECALL(R5) - std r4,STK_PARAM(R4)(r1) mr r0,r4 mr r4,r5 @@ -196,7 +195,7 @@ plpar_hcall_trace: HVSC - ld r12,STK_PARAM(R4)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) std r4,0(r12) std r5,8(r12) std r6,16(r12) @@ -296,7 +295,6 @@ _GLOBAL_TOC(plpar_hcall9) plpar_hcall9_trace: HCALL_INST_PRECALL(R5) - std r4,STK_PARAM(R4)(r1) mr r0,r4 mr r4,r5 -- cgit v1.2.3 From dfb5f8cbd5992d5769edfd3e059fad9e0b8bdafb Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 29 Sep 2023 22:53:37 +0530 Subject: powerpc/pseries: Remove unused r0 in the hcall tracing code In the plpar_hcall trace code, currently we use r0 to store the value of r4. But this value is not used subsequently in the code. Hence remove this unused save to r0 in plpar_hcall and plpar_hcall9 Suggested-by: Naveen N Rao Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://msgid.link/20230929172337.7906-2-atrajeev@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/hvCall.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 2addf2ea03f0..2b0cac6fb61f 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -184,8 +184,6 @@ _GLOBAL_TOC(plpar_hcall) plpar_hcall_trace: HCALL_INST_PRECALL(R5) - mr r0,r4 - mr r4,r5 mr r5,r6 mr r6,r7 @@ -295,8 +293,6 @@ _GLOBAL_TOC(plpar_hcall9) plpar_hcall9_trace: HCALL_INST_PRECALL(R5) - mr r0,r4 - mr r4,r5 mr r5,r6 mr r6,r7 -- cgit v1.2.3 From 383eba9f9a7f4cd639d367ea5daa6df2be392c54 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 19 Sep 2023 14:42:22 +0200 Subject: power: supply: qcom_battmgr: fix battery_id type qcom_battmgr_update_request.battery_id is written to using cpu_to_le32() and should be of type __le32, just like all other 32bit integer requests for qcom_battmgr. Cc: stable@vger.kernel.org # 6.3 Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309162149.4owm9iXc-lkp@intel.com/ Fixes: 29e8142b5623 ("power: supply: Introduce Qualcomm PMIC GLINK power supply") Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20230919124222.1155894-1-sebastian.reichel@collabora.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_battmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index de77df97b3a4..a05fd00711f6 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -105,7 +105,7 @@ struct qcom_battmgr_property_request { struct qcom_battmgr_update_request { struct pmic_glink_hdr hdr; - u32 battery_id; + __le32 battery_id; }; struct qcom_battmgr_charge_time_request { -- cgit v1.2.3 From 8894b432548851f705f72ff135d3dcbd442a18d1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 29 Sep 2023 12:16:49 +0200 Subject: power: supply: qcom_battmgr: fix enable request endianness Add the missing endianness conversion when sending the enable request so that the driver will work also on a hypothetical big-endian machine. This issue was reported by sparse. Fixes: 29e8142b5623 ("power: supply: Introduce Qualcomm PMIC GLINK power supply") Cc: stable@vger.kernel.org # 6.3 Cc: Bjorn Andersson Signed-off-by: Johan Hovold Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20230929101649.20206-1-johan+linaro@kernel.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_battmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index a05fd00711f6..ec163d1bcd18 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -1282,9 +1282,9 @@ static void qcom_battmgr_enable_worker(struct work_struct *work) { struct qcom_battmgr *battmgr = container_of(work, struct qcom_battmgr, enable_work); struct qcom_battmgr_enable_request req = { - .hdr.owner = PMIC_GLINK_OWNER_BATTMGR, - .hdr.type = PMIC_GLINK_NOTIFY, - .hdr.opcode = BATTMGR_REQUEST_NOTIFICATION, + .hdr.owner = cpu_to_le32(PMIC_GLINK_OWNER_BATTMGR), + .hdr.type = cpu_to_le32(PMIC_GLINK_NOTIFY), + .hdr.opcode = cpu_to_le32(BATTMGR_REQUEST_NOTIFICATION), }; int ret; -- cgit v1.2.3 From 34f08eb0ba6e4869bbfb682bf3d7d0494ffd2f87 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 13 Jul 2023 04:14:29 -0400 Subject: usb: cdnsp: Fixes issue with dequeuing not queued requests Gadget ACM while unloading module try to dequeue not queued usb request which causes the kernel to crash. Patch adds extra condition to check whether usb request is processed by CDNSP driver. cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/20230713081429.326660-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index fff9ec9c391f..4b67749edb99 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1125,6 +1125,9 @@ static int cdnsp_gadget_ep_dequeue(struct usb_ep *ep, unsigned long flags; int ret; + if (request->status != -EINPROGRESS) + return 0; + if (!pep->endpoint.desc) { dev_err(pdev->dev, "%s: can't dequeue to disabled endpoint\n", -- cgit v1.2.3 From 6658a62e1ddf726483cb2d8bf45ea3f9bd533074 Mon Sep 17 00:00:00 2001 From: Xingxing Luo Date: Fri, 22 Sep 2023 15:59:29 +0800 Subject: usb: musb: Modify the "HWVers" register address musb HWVers rgister address is not 0x69, if we operate the wrong address 0x69, it will cause a kernel crash, because there is no register corresponding to this address in the additional control register of musb. In fact, HWVers has been defined in musb_register.h, and the name is "MUSB_HWVERS", so We need to use this macro instead of 0x69. Fixes: c2365ce5d5a0 ("usb: musb: replace hard coded registers with defines") Cc: stable@vger.kernel.org Signed-off-by: Xingxing Luo Link: https://lore.kernel.org/r/20230922075929.31074-1-xingxing.luo@unisoc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c index 78c726a71b17..2d623284edf6 100644 --- a/drivers/usb/musb/musb_debugfs.c +++ b/drivers/usb/musb/musb_debugfs.c @@ -39,7 +39,7 @@ static const struct musb_register_map musb_regmap[] = { { "IntrUsbE", MUSB_INTRUSBE, 8 }, { "DevCtl", MUSB_DEVCTL, 8 }, { "VControl", 0x68, 32 }, - { "HWVers", 0x69, 16 }, + { "HWVers", MUSB_HWVERS, 16 }, { "LinkInfo", MUSB_LINKINFO, 8 }, { "VPLen", MUSB_VPLEN, 8 }, { "HS_EOF1", MUSB_HS_EOF1, 8 }, -- cgit v1.2.3 From 33d7e37232155aadebe4145dcc592f00dabd7a2b Mon Sep 17 00:00:00 2001 From: Xingxing Luo Date: Tue, 19 Sep 2023 11:30:55 +0800 Subject: usb: musb: Get the musb_qh poniter after musb_giveback When multiple threads are performing USB transmission, musb->lock will be unlocked when musb_giveback is executed. At this time, qh may be released in the dequeue process in other threads, resulting in a wild pointer, so it needs to be here get qh again, and judge whether qh is NULL, and when dequeue, you need to set qh to NULL. Fixes: dbac5d07d13e ("usb: musb: host: don't start next rx urb if current one failed") Cc: stable@vger.kernel.org Signed-off-by: Xingxing Luo Link: https://lore.kernel.org/r/20230919033055.14085-1-xingxing.luo@unisoc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index a02c29216955..bc4507781167 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -321,10 +321,16 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb, musb_giveback(musb, urb, status); qh->is_ready = ready; + /* + * musb->lock had been unlocked in musb_giveback, so qh may + * be freed, need to get it again + */ + qh = musb_ep_get_qh(hw_ep, is_in); + /* reclaim resources (and bandwidth) ASAP; deschedule it, and * invalidate qh as soon as list_empty(&hep->urb_list) */ - if (list_empty(&qh->hep->urb_list)) { + if (qh && list_empty(&qh->hep->urb_list)) { struct list_head *head; struct dma_controller *dma = musb->dma_controller; @@ -2398,6 +2404,7 @@ static int musb_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) * and its URB list has emptied, recycle this qh. */ if (ready && list_empty(&qh->hep->urb_list)) { + musb_ep_set_qh(qh->hw_ep, is_in, NULL); qh->hep->hcpriv = NULL; list_del(&qh->ring); kfree(qh); -- cgit v1.2.3 From 427694cfaafa565a3db5c5ea71df6bc095dca92f Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Wed, 27 Sep 2023 16:28:58 +0530 Subject: usb: gadget: ncm: Handle decoding of multiple NTB's in unwrap call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When NCM is used with hosts like Windows PC, it is observed that there are multiple NTB's contained in one usb request giveback. Since the driver unwraps the obtained request data assuming only one NTB is present, we loose the subsequent NTB's present resulting in data loss. Fix this by checking the parsed block length with the obtained data length in usb request and continue parsing after the last byte of current NTB. Cc: stable@vger.kernel.org Fixes: 9f6ce4240a2b ("usb: gadget: f_ncm.c added") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20230927105858.12950-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index feccf4c8cc4f..e6ab8cc225ff 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1156,7 +1156,8 @@ static int ncm_unwrap_ntb(struct gether *port, struct sk_buff_head *list) { struct f_ncm *ncm = func_to_ncm(&port->func); - __le16 *tmp = (void *) skb->data; + unsigned char *ntb_ptr = skb->data; + __le16 *tmp; unsigned index, index2; int ndp_index; unsigned dg_len, dg_len2; @@ -1169,6 +1170,10 @@ static int ncm_unwrap_ntb(struct gether *port, const struct ndp_parser_opts *opts = ncm->parser_opts; unsigned crc_len = ncm->is_crc ? sizeof(uint32_t) : 0; int dgram_counter; + int to_process = skb->len; + +parse_ntb: + tmp = (__le16 *)ntb_ptr; /* dwSignature */ if (get_unaligned_le32(tmp) != opts->nth_sign) { @@ -1215,7 +1220,7 @@ static int ncm_unwrap_ntb(struct gether *port, * walk through NDP * dwSignature */ - tmp = (void *)(skb->data + ndp_index); + tmp = (__le16 *)(ntb_ptr + ndp_index); if (get_unaligned_le32(tmp) != ncm->ndp_sign) { INFO(port->func.config->cdev, "Wrong NDP SIGN\n"); goto err; @@ -1272,11 +1277,11 @@ static int ncm_unwrap_ntb(struct gether *port, if (ncm->is_crc) { uint32_t crc, crc2; - crc = get_unaligned_le32(skb->data + + crc = get_unaligned_le32(ntb_ptr + index + dg_len - crc_len); crc2 = ~crc32_le(~0, - skb->data + index, + ntb_ptr + index, dg_len - crc_len); if (crc != crc2) { INFO(port->func.config->cdev, @@ -1303,7 +1308,7 @@ static int ncm_unwrap_ntb(struct gether *port, dg_len - crc_len); if (skb2 == NULL) goto err; - skb_put_data(skb2, skb->data + index, + skb_put_data(skb2, ntb_ptr + index, dg_len - crc_len); skb_queue_tail(list, skb2); @@ -1316,10 +1321,17 @@ static int ncm_unwrap_ntb(struct gether *port, } while (ndp_len > 2 * (opts->dgram_item_len * 2)); } while (ndp_index); - dev_consume_skb_any(skb); - VDBG(port->func.config->cdev, "Parsed NTB with %d frames\n", dgram_counter); + + to_process -= block_len; + if (to_process != 0) { + ntb_ptr = (unsigned char *)(ntb_ptr + block_len); + goto parse_ntb; + } + + dev_consume_skb_any(skb); + return 0; err: skb_queue_purge(list); -- cgit v1.2.3 From 76750f1dcad3e1af2295cdf2f9434e06e3178ef3 Mon Sep 17 00:00:00 2001 From: Hui Liu Date: Thu, 31 Aug 2023 18:19:45 +0800 Subject: usb: typec: qcom: Update the logic of regulator enable and disable Removed the call logic of disable and enable regulator in reset function. Enable the regulator in qcom_pmic_typec_start function and disable it in qcom_pmic_typec_stop function to avoid unbalanced regulator disable warnings. Fixes: a4422ff22142 ("usb: typec: qcom: Add Qualcomm PMIC Type-C driver") Cc: stable Reviewed-by: Bryan O'Donoghue Acked-by: Bryan O'Donoghue Tested-by: Bryan O'Donoghue # rb5 Signed-off-by: Hui Liu Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230831-qcom-tcpc-v5-1-5e2661dc6c1d@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c index bb0b8479d80f..52c81378e36e 100644 --- a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c +++ b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c @@ -381,10 +381,6 @@ static int qcom_pmic_typec_pdphy_enable(struct pmic_typec_pdphy *pmic_typec_pdph struct device *dev = pmic_typec_pdphy->dev; int ret; - ret = regulator_enable(pmic_typec_pdphy->vdd_pdphy); - if (ret) - return ret; - /* PD 2.0, DR=TYPEC_DEVICE, PR=TYPEC_SINK */ ret = regmap_update_bits(pmic_typec_pdphy->regmap, pmic_typec_pdphy->base + USB_PDPHY_MSG_CONFIG_REG, @@ -422,8 +418,6 @@ static int qcom_pmic_typec_pdphy_disable(struct pmic_typec_pdphy *pmic_typec_pdp ret = regmap_write(pmic_typec_pdphy->regmap, pmic_typec_pdphy->base + USB_PDPHY_EN_CONTROL_REG, 0); - regulator_disable(pmic_typec_pdphy->vdd_pdphy); - return ret; } @@ -447,6 +441,10 @@ int qcom_pmic_typec_pdphy_start(struct pmic_typec_pdphy *pmic_typec_pdphy, int i; int ret; + ret = regulator_enable(pmic_typec_pdphy->vdd_pdphy); + if (ret) + return ret; + pmic_typec_pdphy->tcpm_port = tcpm_port; ret = pmic_typec_pdphy_reset(pmic_typec_pdphy); @@ -467,6 +465,8 @@ void qcom_pmic_typec_pdphy_stop(struct pmic_typec_pdphy *pmic_typec_pdphy) disable_irq(pmic_typec_pdphy->irq_data[i].irq); qcom_pmic_typec_pdphy_reset_on(pmic_typec_pdphy); + + regulator_disable(pmic_typec_pdphy->vdd_pdphy); } struct pmic_typec_pdphy *qcom_pmic_typec_pdphy_alloc(struct device *dev) -- cgit v1.2.3 From f74a7afc224acd5e922c7a2e52244d891bbe44ee Mon Sep 17 00:00:00 2001 From: Ricardo Cañuelo Date: Wed, 30 Aug 2023 12:04:18 +0200 Subject: usb: hub: Guard against accesses to uninitialized BOS descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many functions in drivers/usb/core/hub.c and drivers/usb/core/hub.h access fields inside udev->bos without checking if it was allocated and initialized. If usb_get_bos_descriptor() fails for whatever reason, udev->bos will be NULL and those accesses will result in a crash: BUG: kernel NULL pointer dereference, address: 0000000000000018 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 5 PID: 17818 Comm: kworker/5:1 Tainted: G W 5.15.108-18910-gab0e1cb584e1 #1 Hardware name: Google Kindred/Kindred, BIOS Google_Kindred.12672.413.0 02/03/2021 Workqueue: usb_hub_wq hub_event RIP: 0010:hub_port_reset+0x193/0x788 Code: 89 f7 e8 20 f7 15 00 48 8b 43 08 80 b8 96 03 00 00 03 75 36 0f b7 88 92 03 00 00 81 f9 10 03 00 00 72 27 48 8b 80 a8 03 00 00 <48> 83 78 18 00 74 19 48 89 df 48 8b 75 b0 ba 02 00 00 00 4c 89 e9 RSP: 0018:ffffab740c53fcf8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffa1bc5f678000 RCX: 0000000000000310 RDX: fffffffffffffdff RSI: 0000000000000286 RDI: ffffa1be9655b840 RBP: ffffab740c53fd70 R08: 00001b7d5edaa20c R09: ffffffffb005e060 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 R13: ffffab740c53fd3e R14: 0000000000000032 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffffa1be96540000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 000000022e80c005 CR4: 00000000003706e0 Call Trace: hub_event+0x73f/0x156e ? hub_activate+0x5b7/0x68f process_one_work+0x1a2/0x487 worker_thread+0x11a/0x288 kthread+0x13a/0x152 ? process_one_work+0x487/0x487 ? kthread_associate_blkcg+0x70/0x70 ret_from_fork+0x1f/0x30 Fall back to a default behavior if the BOS descriptor isn't accessible and skip all the functionalities that depend on it: LPM support checks, Super Speed capabilitiy checks, U1/U2 states setup. Signed-off-by: Ricardo Cañuelo Cc: stable Link: https://lore.kernel.org/r/20230830100418.1952143-1-ricardo.canuelo@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 25 ++++++++++++++++++++++--- drivers/usb/core/hub.h | 2 +- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3c54b218301c..0ff47eeffb49 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -151,6 +151,10 @@ int usb_device_supports_lpm(struct usb_device *udev) if (udev->quirks & USB_QUIRK_NO_LPM) return 0; + /* Skip if the device BOS descriptor couldn't be read */ + if (!udev->bos) + return 0; + /* USB 2.1 (and greater) devices indicate LPM support through * their USB 2.0 Extended Capabilities BOS descriptor. */ @@ -327,6 +331,10 @@ static void usb_set_lpm_parameters(struct usb_device *udev) if (!udev->lpm_capable || udev->speed < USB_SPEED_SUPER) return; + /* Skip if the device BOS descriptor couldn't be read */ + if (!udev->bos) + return; + hub = usb_hub_to_struct_hub(udev->parent); /* It doesn't take time to transition the roothub into U0, since it * doesn't have an upstream link. @@ -2704,13 +2712,17 @@ out_authorized: static enum usb_ssp_rate get_port_ssp_rate(struct usb_device *hdev, u32 ext_portstatus) { - struct usb_ssp_cap_descriptor *ssp_cap = hdev->bos->ssp_cap; + struct usb_ssp_cap_descriptor *ssp_cap; u32 attr; u8 speed_id; u8 ssac; u8 lanes; int i; + if (!hdev->bos) + goto out; + + ssp_cap = hdev->bos->ssp_cap; if (!ssp_cap) goto out; @@ -4215,8 +4227,15 @@ static void usb_enable_link_state(struct usb_hcd *hcd, struct usb_device *udev, enum usb3_link_state state) { int timeout; - __u8 u1_mel = udev->bos->ss_cap->bU1devExitLat; - __le16 u2_mel = udev->bos->ss_cap->bU2DevExitLat; + __u8 u1_mel; + __le16 u2_mel; + + /* Skip if the device BOS descriptor couldn't be read */ + if (!udev->bos) + return; + + u1_mel = udev->bos->ss_cap->bU1devExitLat; + u2_mel = udev->bos->ss_cap->bU2DevExitLat; /* If the device says it doesn't have *any* exit latency to come out of * U1 or U2, it's probably lying. Assume it doesn't implement that link diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 37897afd1b64..d44dd7f6623e 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -153,7 +153,7 @@ static inline int hub_is_superspeedplus(struct usb_device *hdev) { return (hdev->descriptor.bDeviceProtocol == USB_HUB_PR_SS && le16_to_cpu(hdev->descriptor.bcdUSB) >= 0x0310 && - hdev->bos->ssp_cap); + hdev->bos && hdev->bos->ssp_cap); } static inline unsigned hub_power_on_good_delay(struct usb_hub *hub) -- cgit v1.2.3 From 8bea147dfdf823eaa8d3baeccc7aeb041b41944b Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 13 Sep 2023 00:52:15 +0000 Subject: usb: dwc3: Soft reset phy on probe for host When there's phy initialization, we need to initiate a soft-reset sequence. That's done through USBCMD.HCRST in the xHCI driver and its initialization, However, the dwc3 driver may modify core configs before the soft-reset. This may result in some connection instability. So, ensure the phy is ready before the controller updates the GCTL.PRTCAPDIR or other settings by issuing phy soft-reset. Note that some host-mode configurations may not expose device registers to initiate the controller soft-reset (via DCTL.CoreSftRst). So we reset through GUSB3PIPECTL and GUSB2PHYCFG instead. Cc: stable@vger.kernel.org Fixes: e835c0a4e23c ("usb: dwc3: don't reset device side if dwc3 was configured as host-only") Reported-by: Kenta Sato Closes: https://lore.kernel.org/linux-usb/ZPUciRLUcjDywMVS@debian.me/ Signed-off-by: Thinh Nguyen Tested-by: Kenta Sato Link: https://lore.kernel.org/r/70aea513215d273669152696cc02b20ddcdb6f1a.1694564261.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 9c6bf054f15d..343d2570189f 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -279,9 +279,46 @@ int dwc3_core_soft_reset(struct dwc3 *dwc) * XHCI driver will reset the host block. If dwc3 was configured for * host-only mode or current role is host, then we can return early. */ - if (dwc->dr_mode == USB_DR_MODE_HOST || dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST) + if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST) return 0; + /* + * If the dr_mode is host and the dwc->current_dr_role is not the + * corresponding DWC3_GCTL_PRTCAP_HOST, then the dwc3_core_init_mode + * isn't executed yet. Ensure the phy is ready before the controller + * updates the GCTL.PRTCAPDIR or other settings by soft-resetting + * the phy. + * + * Note: GUSB3PIPECTL[n] and GUSB2PHYCFG[n] are port settings where n + * is port index. If this is a multiport host, then we need to reset + * all active ports. + */ + if (dwc->dr_mode == USB_DR_MODE_HOST) { + u32 usb3_port; + u32 usb2_port; + + usb3_port = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); + usb3_port |= DWC3_GUSB3PIPECTL_PHYSOFTRST; + dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), usb3_port); + + usb2_port = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + usb2_port |= DWC3_GUSB2PHYCFG_PHYSOFTRST; + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), usb2_port); + + /* Small delay for phy reset assertion */ + usleep_range(1000, 2000); + + usb3_port &= ~DWC3_GUSB3PIPECTL_PHYSOFTRST; + dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), usb3_port); + + usb2_port &= ~DWC3_GUSB2PHYCFG_PHYSOFTRST; + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), usb2_port); + + /* Wait for clock synchronization */ + msleep(50); + return 0; + } + reg = dwc3_readl(dwc->regs, DWC3_DCTL); reg |= DWC3_DCTL_CSFTRST; reg &= ~DWC3_DCTL_RUN_STOP; -- cgit v1.2.3 From 9f35d612da5592f1bf1cae44ec1e023df37bea12 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Tue, 26 Sep 2023 15:53:33 +0800 Subject: usb: cdns3: Modify the return value of cdns_set_active () to void when CONFIG_PM_SLEEP is disabled The return type of cdns_set_active () is inconsistent depending on whether CONFIG_PM_SLEEP is enabled, so the return value is modified to void type. Reported-by: Pavel Machek Closes: https://lore.kernel.org/all/ZP7lIKUzD68XA91j@duo.ucw.cz/ Fixes: 2319b9c87fe2 ("usb: cdns3: Put the cdns set active part outside the spin lock") Cc: stable@vger.kernel.org Signed-off-by: Xiaolei Wang Reviewed-by: Pavel Machek Reviewed-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20230926075333.1791011-1-xiaolei.wang@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/core.h b/drivers/usb/cdns3/core.h index 4a4dbc2c1561..81a9c9d6be08 100644 --- a/drivers/usb/cdns3/core.h +++ b/drivers/usb/cdns3/core.h @@ -131,8 +131,7 @@ void cdns_set_active(struct cdns *cdns, u8 set_active); #else /* CONFIG_PM_SLEEP */ static inline int cdns_resume(struct cdns *cdns) { return 0; } -static inline int cdns_set_active(struct cdns *cdns, u8 set_active) -{ return 0; } +static inline void cdns_set_active(struct cdns *cdns, u8 set_active) { } static inline int cdns_suspend(struct cdns *cdns) { return 0; } #endif /* CONFIG_PM_SLEEP */ -- cgit v1.2.3 From 3061b6491f491197a35e14e49f805d661b02acd4 Mon Sep 17 00:00:00 2001 From: Piyush Mehta Date: Fri, 29 Sep 2023 17:45:14 +0530 Subject: usb: gadget: udc-xilinx: replace memcpy with memcpy_toio For ARM processor, unaligned access to device memory is not allowed. Method memcpy does not take care of alignment. USB detection failure with the unalingned address of memory, with below kernel crash. To fix the unalingned address kernel panic, replace memcpy with memcpy_toio method. Kernel crash: Unable to handle kernel paging request at virtual address ffff80000c05008a Mem abort info: ESR = 0x96000061 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x21: alignment fault Data abort info: ISV = 0, ISS = 0x00000061 CM = 0, WnR = 1 swapper pgtable: 4k pages, 48-bit VAs, pgdp=000000000143b000 [ffff80000c05008a] pgd=100000087ffff003, p4d=100000087ffff003, pud=100000087fffe003, pmd=1000000800bcc003, pte=00680000a0010713 Internal error: Oops: 96000061 [#1] SMP Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.15.19-xilinx-v2022.1 #1 Hardware name: ZynqMP ZCU102 Rev1.0 (DT) pstate: 200000c5 (nzCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __memcpy+0x30/0x260 lr : __xudc_ep0_queue+0xf0/0x110 sp : ffff800008003d00 x29: ffff800008003d00 x28: ffff800009474e80 x27: 00000000000000a0 x26: 0000000000000100 x25: 0000000000000012 x24: ffff000800bc8080 x23: 0000000000000001 x22: 0000000000000012 x21: ffff000800bc8080 x20: 0000000000000012 x19: ffff000800bc8080 x18: 0000000000000000 x17: ffff800876482000 x16: ffff800008004000 x15: 0000000000004000 x14: 00001f09785d0400 x13: 0103020101005567 x12: 0781400000000200 x11: 00000000c5672a10 x10: 00000000000008d0 x9 : ffff800009463cf0 x8 : ffff8000094757b0 x7 : 0201010055670781 x6 : 4000000002000112 x5 : ffff80000c05009a x4 : ffff000800a15012 x3 : ffff00080362ad80 x2 : 0000000000000012 x1 : ffff000800a15000 x0 : ffff80000c050088 Call trace: __memcpy+0x30/0x260 xudc_ep0_queue+0x3c/0x60 usb_ep_queue+0x38/0x44 composite_ep0_queue.constprop.0+0x2c/0xc0 composite_setup+0x8d0/0x185c configfs_composite_setup+0x74/0xb0 xudc_irq+0x570/0xa40 __handle_irq_event_percpu+0x58/0x170 handle_irq_event+0x60/0x120 handle_fasteoi_irq+0xc0/0x220 handle_domain_irq+0x60/0x90 gic_handle_irq+0x74/0xa0 call_on_irq_stack+0x2c/0x60 do_interrupt_handler+0x54/0x60 el1_interrupt+0x30/0x50 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x78/0x7c arch_cpu_idle+0x18/0x2c do_idle+0xdc/0x15c cpu_startup_entry+0x28/0x60 rest_init+0xc8/0xe0 arch_call_rest_init+0x10/0x1c start_kernel+0x694/0x6d4 __primary_switched+0xa4/0xac Fixes: 1f7c51660034 ("usb: gadget: Add xilinx usb2 device support") Reported-by: kernel test robot Closes: https://lore.kernel.org/all/202209020044.CX2PfZzM-lkp@intel.com/ Cc: stable@vger.kernel.org Signed-off-by: Piyush Mehta Link: https://lore.kernel.org/r/20230929121514.13475-1-piyush.mehta@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/udc-xilinx.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c index 56b8286a8009..74590f93ea61 100644 --- a/drivers/usb/gadget/udc/udc-xilinx.c +++ b/drivers/usb/gadget/udc/udc-xilinx.c @@ -497,11 +497,13 @@ static int xudc_eptxrx(struct xusb_ep *ep, struct xusb_req *req, /* Get the Buffer address and copy the transmit data.*/ eprambase = (u32 __force *)(udc->addr + ep->rambase); if (ep->is_in) { - memcpy(eprambase, bufferptr, bytestosend); + memcpy_toio((void __iomem *)eprambase, bufferptr, + bytestosend); udc->write_fn(udc->addr, ep->offset + XUSB_EP_BUF0COUNT_OFFSET, bufferlen); } else { - memcpy(bufferptr, eprambase, bytestosend); + memcpy_toio((void __iomem *)bufferptr, eprambase, + bytestosend); } /* * Enable the buffer for transmission. @@ -515,11 +517,13 @@ static int xudc_eptxrx(struct xusb_ep *ep, struct xusb_req *req, eprambase = (u32 __force *)(udc->addr + ep->rambase + ep->ep_usb.maxpacket); if (ep->is_in) { - memcpy(eprambase, bufferptr, bytestosend); + memcpy_toio((void __iomem *)eprambase, bufferptr, + bytestosend); udc->write_fn(udc->addr, ep->offset + XUSB_EP_BUF1COUNT_OFFSET, bufferlen); } else { - memcpy(bufferptr, eprambase, bytestosend); + memcpy_toio((void __iomem *)bufferptr, eprambase, + bytestosend); } /* * Enable the buffer for transmission. @@ -1021,7 +1025,7 @@ static int __xudc_ep0_queue(struct xusb_ep *ep0, struct xusb_req *req) udc->addr); length = req->usb_req.actual = min_t(u32, length, EP0_MAX_PACKET); - memcpy(corebuf, req->usb_req.buf, length); + memcpy_toio((void __iomem *)corebuf, req->usb_req.buf, length); udc->write_fn(udc->addr, XUSB_EP_BUF0COUNT_OFFSET, length); udc->write_fn(udc->addr, XUSB_BUFFREADY_OFFSET, 1); } else { @@ -1752,7 +1756,7 @@ static void xudc_handle_setup(struct xusb_udc *udc) /* Load up the chapter 9 command buffer.*/ ep0rambase = (u32 __force *) (udc->addr + XUSB_SETUP_PKT_ADDR_OFFSET); - memcpy(&setup, ep0rambase, 8); + memcpy_toio((void __iomem *)&setup, ep0rambase, 8); udc->setup = setup; udc->setup.wValue = cpu_to_le16((u16 __force)setup.wValue); @@ -1839,7 +1843,7 @@ static void xudc_ep0_out(struct xusb_udc *udc) (ep0->rambase << 2)); buffer = req->usb_req.buf + req->usb_req.actual; req->usb_req.actual = req->usb_req.actual + bytes_to_rx; - memcpy(buffer, ep0rambase, bytes_to_rx); + memcpy_toio((void __iomem *)buffer, ep0rambase, bytes_to_rx); if (req->usb_req.length == req->usb_req.actual) { /* Data transfer completed get ready for Status stage */ @@ -1915,7 +1919,7 @@ static void xudc_ep0_in(struct xusb_udc *udc) (ep0->rambase << 2)); buffer = req->usb_req.buf + req->usb_req.actual; req->usb_req.actual = req->usb_req.actual + length; - memcpy(ep0rambase, buffer, length); + memcpy_toio((void __iomem *)ep0rambase, buffer, length); } udc->write_fn(udc->addr, XUSB_EP_BUF0COUNT_OFFSET, count); udc->write_fn(udc->addr, XUSB_BUFFREADY_OFFSET, 1); -- cgit v1.2.3 From 2ec8b010979036c2fe79a64adb6ecc0bd11e91d1 Mon Sep 17 00:00:00 2001 From: "William A. Kennington III" Date: Fri, 22 Sep 2023 11:28:12 -0700 Subject: spi: npcm-fiu: Fix UMA reads when dummy.nbytes == 0 We don't want to use the value of ilog2(0) as dummy.buswidth is 0 when dummy.nbytes is 0. Since we have no dummy bytes, we don't need to configure the dummy byte bits per clock register value anyway. Signed-off-by: "William A. Kennington III" Link: https://lore.kernel.org/r/20230922182812.2728066-1-william@wkennington.com Signed-off-by: Mark Brown --- drivers/spi/spi-npcm-fiu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c index 0ca21ff0e9cc..e42248519688 100644 --- a/drivers/spi/spi-npcm-fiu.c +++ b/drivers/spi/spi-npcm-fiu.c @@ -353,8 +353,9 @@ static int npcm_fiu_uma_read(struct spi_mem *mem, uma_cfg |= ilog2(op->cmd.buswidth); uma_cfg |= ilog2(op->addr.buswidth) << NPCM_FIU_UMA_CFG_ADBPCK_SHIFT; - uma_cfg |= ilog2(op->dummy.buswidth) - << NPCM_FIU_UMA_CFG_DBPCK_SHIFT; + if (op->dummy.nbytes) + uma_cfg |= ilog2(op->dummy.buswidth) + << NPCM_FIU_UMA_CFG_DBPCK_SHIFT; uma_cfg |= ilog2(op->data.buswidth) << NPCM_FIU_UMA_CFG_RDBPCK_SHIFT; uma_cfg |= op->dummy.nbytes << NPCM_FIU_UMA_CFG_DBSIZ_SHIFT; -- cgit v1.2.3 From e59e38158c61162f2e8beb4620df21a1585117df Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 11 Sep 2023 10:22:38 +0200 Subject: usb: misc: onboard_hub: add support for Microchip USB2412 USB 2.0 hub The USB2412 is a 2-Port USB 2.0 hub controller that provides a reset pin and a single 3v3 powre source, which makes it suitable to be controlled by the onboard_hub driver. This hub has the same reset timings as USB2514/2517 and the same onboard hub specific-data can be reused for USB2412. Signed-off-by: Javier Carrasco Cc: stable Acked-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20230911-topic-2412_onboard_hub-v1-1-7704181ddfff@wolfvision.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_hub.c | 1 + drivers/usb/misc/onboard_usb_hub.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index 3da1a4659c5f..57bbe1309094 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -434,6 +434,7 @@ static const struct usb_device_id onboard_hub_id_table[] = { { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0620) }, /* Genesys Logic GL3523 USB 3.1 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2412) }, /* USB2412 USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h index 4026ba64c592..2a4ab5ac0ebe 100644 --- a/drivers/usb/misc/onboard_usb_hub.h +++ b/drivers/usb/misc/onboard_usb_hub.h @@ -47,6 +47,7 @@ static const struct onboard_hub_pdata vialab_vl817_data = { }; static const struct of_device_id onboard_hub_match[] = { + { .compatible = "usb424,2412", .data = µchip_usb424_data, }, { .compatible = "usb424,2514", .data = µchip_usb424_data, }, { .compatible = "usb424,2517", .data = µchip_usb424_data, }, { .compatible = "usb451,8140", .data = &ti_tusb8041_data, }, -- cgit v1.2.3 From a00e197daec52bcd955e118f5f57d706da5bfe50 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Mon, 11 Sep 2023 14:34:15 +0530 Subject: usb: typec: ucsi: Clear EVENT_PENDING bit if ucsi_send_command fails Currently if ucsi_send_command() fails, then we bail out without clearing EVENT_PENDING flag. So when the next connector change event comes, ucsi_connector_change() won't queue the con->work, because of which none of the new events will be processed. Fix this by clearing EVENT_PENDING flag if ucsi_send_command() fails. Cc: stable@vger.kernel.org # 5.16 Fixes: 512df95b9432 ("usb: typec: ucsi: Better fix for missing unplug events issue") Signed-off-by: Prashanth K Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/1694423055-8440-1-git-send-email-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index c6dfe3dff346..509c67c94a70 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -884,6 +884,7 @@ static void ucsi_handle_connector_change(struct work_struct *work) if (ret < 0) { dev_err(ucsi->dev, "%s: GET_CONNECTOR_STATUS failed (%d)\n", __func__, ret); + clear_bit(EVENT_PENDING, &con->ucsi->flags); goto out_unlock; } -- cgit v1.2.3 From 41a43013d2366db5b88b42bbcd8e8f040b6ccf21 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Fri, 15 Sep 2023 17:31:05 +0300 Subject: usb: xhci: xhci-ring: Use sysdev for mapping bounce buffer As mentioned in: commit 474ed23a6257 ("xhci: align the last trb before link if it is easily splittable.") A bounce buffer is utilized for ensuring that transfers that span across ring segments are aligned to the EP's max packet size. However, the device that is used to map the DMA buffer to is currently using the XHCI HCD, which does not carry any DMA operations in certain configrations. Migration to using the sysdev entry was introduced for DWC3 based implementations where the IOMMU operations are present. Replace the reference to the controller device to sysdev instead. This allows the bounce buffer to be properly mapped to any implementations that have an IOMMU involved. cc: stable@vger.kernel.org Fixes: 4c39d4b949d3 ("usb: xhci: use bus->sysdev for DMA configuration") Signed-off-by: Wesley Cheng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230915143108.1532163-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1dde53f6eb31..98389b568633 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -798,7 +798,7 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, static void xhci_unmap_td_bounce_buffer(struct xhci_hcd *xhci, struct xhci_ring *ring, struct xhci_td *td) { - struct device *dev = xhci_to_hcd(xhci)->self.controller; + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; struct xhci_segment *seg = td->bounce_seg; struct urb *urb = td->urb; size_t len; @@ -3469,7 +3469,7 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred, static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len, u32 *trb_buff_len, struct xhci_segment *seg) { - struct device *dev = xhci_to_hcd(xhci)->self.controller; + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; unsigned int unalign; unsigned int max_pkt; u32 new_buff_len; -- cgit v1.2.3 From d7cdfc319b2bcf6899ab0a05eec0958bc802a9a1 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 15 Sep 2023 17:31:06 +0300 Subject: xhci: track port suspend state correctly in unsuccessful resume cases xhci-hub.c tracks suspended ports in a suspended_port bitfield. This is checked when responding to a Get_Status(PORT) request to see if a port in running U0 state was recently resumed, and adds the required USB_PORT_STAT_C_SUSPEND change bit in those cases. The suspended_port bit was left uncleared if a device is disconnected during suspend. The bit remained set even when a new device was connected and enumerated. The set bit resulted in a incorrect Get_Status(PORT) response with a bogus USB_PORT_STAT_C_SUSPEND change bit set once the new device reached U0 link state. USB_PORT_STAT_C_SUSPEND change bit is only used for USB2 ports, but xhci-hub keeps track of both USB2 and USB3 suspended ports. Cc: stable@vger.kernel.org Reported-by: Wesley Cheng Closes: https://lore.kernel.org/linux-usb/d68aa806-b26a-0e43-42fb-b8067325e967@quicinc.com/ Fixes: 1d5810b6923c ("xhci: Rework port suspend structures for limited ports.") Tested-by: Wesley Cheng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230915143108.1532163-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 0054d02239e2..0df5d807a77e 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1062,19 +1062,19 @@ static void xhci_get_usb3_port_status(struct xhci_port *port, u32 *status, *status |= USB_PORT_STAT_C_CONFIG_ERROR << 16; /* USB3 specific wPortStatus bits */ - if (portsc & PORT_POWER) { + if (portsc & PORT_POWER) *status |= USB_SS_PORT_STAT_POWER; - /* link state handling */ - if (link_state == XDEV_U0) - bus_state->suspended_ports &= ~(1 << portnum); - } - /* remote wake resume signaling complete */ - if (bus_state->port_remote_wakeup & (1 << portnum) && + /* no longer suspended or resuming */ + if (link_state != XDEV_U3 && link_state != XDEV_RESUME && link_state != XDEV_RECOVERY) { - bus_state->port_remote_wakeup &= ~(1 << portnum); - usb_hcd_end_port_resume(&hcd->self, portnum); + /* remote wake resume signaling complete */ + if (bus_state->port_remote_wakeup & (1 << portnum)) { + bus_state->port_remote_wakeup &= ~(1 << portnum); + usb_hcd_end_port_resume(&hcd->self, portnum); + } + bus_state->suspended_ports &= ~(1 << portnum); } xhci_hub_report_usb3_link_state(xhci, status, portsc); @@ -1131,6 +1131,7 @@ static void xhci_get_usb2_port_status(struct xhci_port *port, u32 *status, usb_hcd_end_port_resume(&port->rhub->hcd->self, portnum); } port->rexit_active = 0; + bus_state->suspended_ports &= ~(1 << portnum); } } -- cgit v1.2.3 From 15f3ef070933817fac2bcbdb9c85bff9e54e9f80 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 15 Sep 2023 17:31:07 +0300 Subject: xhci: Clear EHB bit only at end of interrupt handler The Event Handler Busy bit shall be cleared by software when the Event Ring is empty. The xHC is thereby informed that it may raise another interrupt once it has enqueued new events (sec 4.17.2). However since commit dc0ffbea5729 ("usb: host: xhci: update event ring dequeue pointer on purpose"), the EHB bit is already cleared after half a segment has been processed. As a result, spurious interrupts may occur: - xhci_irq() processes half a segment, clears EHB, continues processing remaining events. - xHC enqueues new events. Because EHB has been cleared, xHC sets Interrupt Pending bit. Interrupt moderation countdown begins. - Meanwhile xhci_irq() continues processing events. Interrupt moderation countdown reaches zero, so an MSI interrupt is signaled. - xhci_irq() empties the Event Ring, clears EHB again and is done. - Because an MSI interrupt has been signaled, xhci_irq() is run again. It discovers there's nothing to do and returns IRQ_NONE. Avoid by clearing the EHB bit only at the end of xhci_irq(). Fixes: dc0ffbea5729 ("usb: host: xhci: update event ring dequeue pointer on purpose") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v5.5+ Cc: Peter Chen Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230915143108.1532163-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 98389b568633..3e5dc0723a8f 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2996,7 +2996,8 @@ static int xhci_handle_event(struct xhci_hcd *xhci, struct xhci_interrupter *ir) */ static void xhci_update_erst_dequeue(struct xhci_hcd *xhci, struct xhci_interrupter *ir, - union xhci_trb *event_ring_deq) + union xhci_trb *event_ring_deq, + bool clear_ehb) { u64 temp_64; dma_addr_t deq; @@ -3017,12 +3018,13 @@ static void xhci_update_erst_dequeue(struct xhci_hcd *xhci, return; /* Update HC event ring dequeue pointer */ - temp_64 &= ERST_PTR_MASK; + temp_64 &= ERST_DESI_MASK; temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK); } /* Clear the event handler busy flag (RW1C) */ - temp_64 |= ERST_EHB; + if (clear_ehb) + temp_64 |= ERST_EHB; xhci_write_64(xhci, temp_64, &ir->ir_set->erst_dequeue); } @@ -3103,7 +3105,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) while (xhci_handle_event(xhci, ir) > 0) { if (event_loop++ < TRBS_PER_SEGMENT / 2) continue; - xhci_update_erst_dequeue(xhci, ir, event_ring_deq); + xhci_update_erst_dequeue(xhci, ir, event_ring_deq, false); event_ring_deq = ir->event_ring->dequeue; /* ring is half-full, force isoc trbs to interrupt more often */ @@ -3113,7 +3115,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) event_loop = 0; } - xhci_update_erst_dequeue(xhci, ir, event_ring_deq); + xhci_update_erst_dequeue(xhci, ir, event_ring_deq, true); ret = IRQ_HANDLED; out: -- cgit v1.2.3 From cf97c5e0f7dda2edc15ecd96775fe6c355823784 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 15 Sep 2023 17:31:08 +0300 Subject: xhci: Preserve RsvdP bits in ERSTBA register correctly xhci_add_interrupter() erroneously preserves only the lowest 4 bits when writing the ERSTBA register, not the lowest 6 bits. Fix it. Migrate the ERST_BASE_RSVDP macro to the modern GENMASK_ULL() syntax to avoid a u64 cast. This was previously fixed by commit 8c1cbec9db1a ("xhci: fix event ring segment table related masks and variables in header"), but immediately undone by commit b17a57f89f69 ("xhci: Refactor interrupter code for initial multi interrupter support."). Fixes: b17a57f89f69 ("xhci: Refactor interrupter code for initial multi interrupter support.") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v6.3+ Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230915143108.1532163-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 4 ++-- drivers/usb/host/xhci.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 8714ab5bf04d..0a37f0d511cf 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2285,8 +2285,8 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, writel(erst_size, &ir->ir_set->erst_size); erst_base = xhci_read_64(xhci, &ir->ir_set->erst_base); - erst_base &= ERST_PTR_MASK; - erst_base |= (ir->erst.erst_dma_addr & (u64) ~ERST_PTR_MASK); + erst_base &= ERST_BASE_RSVDP; + erst_base |= ir->erst.erst_dma_addr & ~ERST_BASE_RSVDP; xhci_write_64(xhci, erst_base, &ir->ir_set->erst_base); /* Set the event ring dequeue address of this interrupter */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 7e282b4522c0..5df370482521 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -514,7 +514,7 @@ struct xhci_intr_reg { #define ERST_SIZE_MASK (0xffff << 16) /* erst_base bitmasks */ -#define ERST_BASE_RSVDP (0x3f) +#define ERST_BASE_RSVDP (GENMASK_ULL(5, 0)) /* erst_dequeue bitmasks */ /* Dequeue ERST Segment Index (DESI) - Segment number (or alias) -- cgit v1.2.3 From 8679328eb859d06a1984ab48d90ac35d11bbcaf1 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 21 Sep 2023 16:52:33 +0200 Subject: serial: Reduce spinlocked portion of uart_rs485_config() Commit 44b27aec9d96 ("serial: core, 8250: set RS485 termination GPIO in serial core") enabled support for RS485 termination GPIOs behind i2c expanders by setting the GPIO outside of the critical section protected by the port spinlock. Access to the i2c expander may sleep, which caused a splat with the port spinlock held. Commit 7c7f9bc986e6 ("serial: Deassert Transmit Enable on probe in driver-specific way") erroneously regressed that by spinlocking the GPIO manipulation again. Fix by moving uart_rs485_config() (the function manipulating the GPIO) outside of the spinlocked section and acquiring the spinlock inside of uart_rs485_config() for the invocation of ->rs485_config() only. This gets us one step closer to pushing the spinlock down into the ->rs485_config() callbacks which actually need it. (Some callbacks do not want to be spinlocked because they perform sleepable register accesses, see e.g. sc16is7xx_config_rs485().) Stack trace for posterity: Voluntary context switch within RCU read-side critical section! WARNING: CPU: 0 PID: 56 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch Call trace: rcu_note_context_switch __schedule schedule schedule_timeout wait_for_completion_timeout bcm2835_i2c_xfer __i2c_transfer i2c_transfer i2c_transfer_buffer_flags regmap_i2c_write _regmap_raw_write_impl _regmap_bus_raw_write _regmap_write _regmap_update_bits regmap_update_bits_base pca953x_gpio_set_value gpiod_set_raw_value_commit gpiod_set_value_nocheck gpiod_set_value_cansleep uart_rs485_config uart_add_one_port pl011_register_port pl011_probe Fixes: 7c7f9bc986e6 ("serial: Deassert Transmit Enable on probe in driver-specific way") Suggested-by: Lino Sanfilippo Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v6.1+ Link: https://lore.kernel.org/r/f3a35967c28b32f3c6432d0aa5936e6a9908282d.1695307688.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 7bdc21d5e13b..ca26a8aef2cb 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1404,12 +1404,18 @@ static void uart_set_rs485_termination(struct uart_port *port, static int uart_rs485_config(struct uart_port *port) { struct serial_rs485 *rs485 = &port->rs485; + unsigned long flags; int ret; + if (!(rs485->flags & SER_RS485_ENABLED)) + return 0; + uart_sanitize_serial_rs485(port, rs485); uart_set_rs485_termination(port, rs485); + spin_lock_irqsave(&port->lock, flags); ret = port->rs485_config(port, NULL, rs485); + spin_unlock_irqrestore(&port->lock, flags); if (ret) memset(rs485, 0, sizeof(*rs485)); @@ -2474,11 +2480,10 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) if (ret == 0) { if (tty) uart_change_line_settings(tty, state, NULL); + uart_rs485_config(uport); spin_lock_irq(&uport->lock); if (!(uport->rs485.flags & SER_RS485_ENABLED)) ops->set_mctrl(uport, uport->mctrl); - else - uart_rs485_config(uport); ops->start_tx(uport); spin_unlock_irq(&uport->lock); tty_port_set_initialized(port, true); @@ -2587,10 +2592,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, port->mctrl &= TIOCM_DTR; if (!(port->rs485.flags & SER_RS485_ENABLED)) port->ops->set_mctrl(port, port->mctrl); - else - uart_rs485_config(port); spin_unlock_irqrestore(&port->lock, flags); + uart_rs485_config(port); + /* * If this driver supports console, and it hasn't been * successfully registered yet, try to re-register it. -- cgit v1.2.3 From 560706eff7c8e5621b0d63afe0866e0e1906e87e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 26 Sep 2023 09:13:17 +0300 Subject: serial: 8250_omap: Fix errors with no_console_suspend We now get errors on system suspend if no_console_suspend is set as reported by Thomas. The errors started with commit 20a41a62618d ("serial: 8250_omap: Use force_suspend and resume for system suspend"). Let's fix the issue by checking for console_suspend_enabled in the system suspend and resume path. Note that with this fix the checks for console_suspend_enabled in omap8250_runtime_suspend() become useless. We now keep runtime PM usage count for an attached kernel console starting with commit bedb404e91bb ("serial: 8250_port: Don't use power management for kernel console"). Fixes: 20a41a62618d ("serial: 8250_omap: Use force_suspend and resume for system suspend") Cc: stable Cc: Udit Kumar Reported-by: Thomas Richard Signed-off-by: Tony Lindgren Tested-by: Thomas Richard Reviewed-by: Dhruva Gole Link: https://lore.kernel.org/r/20230926061319.15140-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 26dd089d8e82..ca972fd37725 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1617,7 +1617,7 @@ static int omap8250_suspend(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - int err; + int err = 0; serial8250_suspend_port(priv->line); @@ -1627,7 +1627,8 @@ static int omap8250_suspend(struct device *dev) if (!device_may_wakeup(dev)) priv->wer = 0; serial_out(up, UART_OMAP_WER, priv->wer); - err = pm_runtime_force_suspend(dev); + if (uart_console(&up->port) && console_suspend_enabled) + err = pm_runtime_force_suspend(dev); flush_work(&priv->qos_work); return err; @@ -1636,11 +1637,15 @@ static int omap8250_suspend(struct device *dev) static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); + struct uart_8250_port *up = serial8250_get_port(priv->line); int err; - err = pm_runtime_force_resume(dev); - if (err) - return err; + if (uart_console(&up->port) && console_suspend_enabled) { + err = pm_runtime_force_resume(dev); + if (err) + return err; + } + serial8250_resume_port(priv->line); /* Paired with pm_runtime_resume_and_get() in omap8250_suspend() */ pm_runtime_mark_last_busy(dev); @@ -1717,16 +1722,6 @@ static int omap8250_runtime_suspend(struct device *dev) if (priv->line >= 0) up = serial8250_get_port(priv->line); - /* - * When using 'no_console_suspend', the console UART must not be - * suspended. Since driver suspend is managed by runtime suspend, - * preventing runtime suspend (by returning error) will keep device - * active during suspend. - */ - if (priv->is_suspending && !console_suspend_enabled) { - if (up && uart_console(&up->port)) - return -EBUSY; - } if (priv->habit & UART_ERRATA_CLOCK_DISABLE) { int ret; -- cgit v1.2.3 From c0409dd3d151f661e7e57b901a81a02565df163c Mon Sep 17 00:00:00 2001 From: Rex Zhang Date: Sat, 16 Sep 2023 14:06:19 +0800 Subject: dmaengine: idxd: use spin_lock_irqsave before wait_event_lock_irq In idxd_cmd_exec(), wait_event_lock_irq() explicitly calls spin_unlock_irq()/spin_lock_irq(). If the interrupt is on before entering wait_event_lock_irq(), it will become off status after wait_event_lock_irq() is called. Later, wait_for_completion() may go to sleep but irq is disabled. The scenario is warned in might_sleep(). Fix it by using spin_lock_irqsave() instead of the primitive spin_lock() to save the irq status before entering wait_event_lock_irq() and using spin_unlock_irqrestore() instead of the primitive spin_unlock() to restore the irq status before entering wait_for_completion(). Before the change: idxd_cmd_exec() { interrupt is on spin_lock() // interrupt is on wait_event_lock_irq() spin_unlock_irq() // interrupt is enabled ... spin_lock_irq() // interrupt is disabled spin_unlock() // interrupt is still disabled wait_for_completion() // report "BUG: sleeping function // called from invalid context... // in_atomic() irqs_disabled()" } After applying spin_lock_irqsave(): idxd_cmd_exec() { interrupt is on spin_lock_irqsave() // save the on state // interrupt is disabled wait_event_lock_irq() spin_unlock_irq() // interrupt is enabled ... spin_lock_irq() // interrupt is disabled spin_unlock_irqrestore() // interrupt is restored to on wait_for_completion() // No Call trace } Fixes: f9f4082dbc56 ("dmaengine: idxd: remove interrupt disable for cmd_lock") Signed-off-by: Rex Zhang Signed-off-by: Lijun Pan Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20230916060619.3744220-1-rex.zhang@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 22d6f4e455b7..8f754f922217 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -477,6 +477,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, union idxd_command_reg cmd; DECLARE_COMPLETION_ONSTACK(done); u32 stat; + unsigned long flags; if (idxd_device_is_halted(idxd)) { dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); @@ -490,7 +491,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, cmd.operand = operand; cmd.int_req = 1; - spin_lock(&idxd->cmd_lock); + spin_lock_irqsave(&idxd->cmd_lock, flags); wait_event_lock_irq(idxd->cmd_waitq, !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags), idxd->cmd_lock); @@ -507,7 +508,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, * After command submitted, release lock and go to sleep until * the command completes via interrupt. */ - spin_unlock(&idxd->cmd_lock); + spin_unlock_irqrestore(&idxd->cmd_lock, flags); wait_for_completion(&done); stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); spin_lock(&idxd->cmd_lock); -- cgit v1.2.3 From 01f1ae2733e2bb4de92fefcea5fda847d92aede1 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sun, 6 Aug 2023 11:25:11 +0800 Subject: dmaengine: mediatek: Fix deadlock caused by synchronize_irq() The synchronize_irq(c->irq) will not return until the IRQ handler mtk_uart_apdma_irq_handler() is completed. If the synchronize_irq() holds a spin_lock and waits the IRQ handler to complete, but the IRQ handler also needs the same spin_lock. The deadlock will happen. The process is shown below: cpu0 cpu1 mtk_uart_apdma_device_pause() | mtk_uart_apdma_irq_handler() spin_lock_irqsave() | | spin_lock_irqsave() //hold the lock to wait | synchronize_irq() | This patch reorders the synchronize_irq(c->irq) outside the spin_lock in order to mitigate the bug. Fixes: 9135408c3ace ("dmaengine: mediatek: Add MediaTek UART APDMA support") Signed-off-by: Duoming Zhou Reviewed-by: Eugen Hristev Link: https://lore.kernel.org/r/20230806032511.45263-1-duoming@zju.edu.cn Signed-off-by: Vinod Koul --- drivers/dma/mediatek/mtk-uart-apdma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index c51dc017b48a..06d12ac39144 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -450,9 +450,8 @@ static int mtk_uart_apdma_device_pause(struct dma_chan *chan) mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); - synchronize_irq(c->irq); - spin_unlock_irqrestore(&c->vc.lock, flags); + synchronize_irq(c->irq); return 0; } -- cgit v1.2.3 From 80f39e1c27ba9e5a1ea7e68e21c569c9d8e46062 Mon Sep 17 00:00:00 2001 From: Szilard Fabian Date: Wed, 4 Oct 2023 05:47:01 -0700 Subject: Input: i8042 - add Fujitsu Lifebook E5411 to i8042 quirk table In the initial boot stage the integrated keyboard of Fujitsu Lifebook E5411 refuses to work and it's not possible to type for example a dm-crypt passphrase without the help of an external keyboard. i8042.nomux kernel parameter resolves this issue but using that a PS/2 mouse is detected. This input device is unused even when the i2c-hid-acpi kernel module is blacklisted making the integrated ELAN touchpad (04F3:308A) not working at all. Since the integrated touchpad is managed by the i2c_designware input driver in the Linux kernel and you can't find a PS/2 mouse port on the computer I think it's safe to not use the PS/2 mouse port at all. Signed-off-by: Szilard Fabian Link: https://lore.kernel.org/r/20231004011749.101789-1-szfabian@bluemarch.art Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 1724d6cb8649..9c39553d30fa 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -618,6 +618,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, + { + /* Fujitsu Lifebook E5411 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU CLIENT COMPUTING LIMITED"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E5411"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { /* Gigabyte M912 */ .matches = { -- cgit v1.2.3 From 423622a90abb243944d1517b9f57db53729e45c4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 4 Oct 2023 07:18:31 -0700 Subject: Input: goodix - ensure int GPIO is in input for gpio_count == 1 && gpio_int_idx == 0 case Add a special case for gpio_count == 1 && gpio_int_idx == 0 to goodix_add_acpi_gpio_mappings(). It seems that on newer x86/ACPI devices the reset and irq GPIOs are no longer listed as GPIO resources instead there is only 1 GpioInt resource and _PS0 does the whole reset sequence for us. This means that we must call acpi_device_fix_up_power() on these devices to ensure that the chip is reset before we try to use it. This part was already fixed in commit 3de93e6ed2df ("Input: goodix - call acpi_device_fix_up_power() in some cases") by adding a call to acpi_device_fix_up_power() to the generic "Unexpected ACPI resources" catch all. But it turns out that this case on some hw needs some more special handling. Specifically the firmware may bootup with the IRQ pin in output mode. The reset sequence from ACPI _PS0 (executed by acpi_device_fix_up_power()) should put the pin in input mode, but the GPIO subsystem has cached the direction at bootup, causing request_irq() to fail due to gpiochip_lock_as_irq() failure: [ 9.119864] Goodix-TS i2c-GDIX1002:00: Unexpected ACPI resources: gpio_count 1, gpio_int_idx 0 [ 9.317443] Goodix-TS i2c-GDIX1002:00: ID 911, version: 1060 [ 9.321902] input: Goodix Capacitive TouchScreen as /devices/pci0000:00/0000:00:17.0/i2c_designware.4/i2c-5/i2c-GDIX1002:00/input/input8 [ 9.327840] gpio gpiochip0: (INT3453:00): gpiochip_lock_as_irq: tried to flag a GPIO set as output for IRQ [ 9.327856] gpio gpiochip0: (INT3453:00): unable to lock HW IRQ 26 for IRQ [ 9.327861] genirq: Failed to request resources for GDIX1002:00 (irq 131) on irqchip intel-gpio [ 9.327912] Goodix-TS i2c-GDIX1002:00: request IRQ failed: -5 Fix this by adding a special case for gpio_count == 1 && gpio_int_idx == 0 which adds an ACPI GPIO lookup table for the int GPIO even though we cannot use it for reset purposes (as there is no reset GPIO). Adding the lookup will make the gpiod_int = gpiod_get(..., GPIOD_IN) call succeed, which will explicitly set the direction to input fixing the issue. Note this re-uses the acpi_goodix_int_first_gpios[] lookup table, since there is only 1 GPIO in the ACPI resources the reset entry in that lookup table will amount to a no-op. Reported-and-tested-by: Michael Smith <1973.mjsmith@gmail.com> Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231003215144.69527-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index da9954d6df44..af32fbe57b63 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -900,6 +900,25 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) dev_info(dev, "No ACPI GpioInt resource, assuming that the GPIO order is reset, int\n"); ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO; gpio_mapping = acpi_goodix_int_last_gpios; + } else if (ts->gpio_count == 1 && ts->gpio_int_idx == 0) { + /* + * On newer devices there is only 1 GpioInt resource and _PS0 + * does the whole reset sequence for us. + */ + acpi_device_fix_up_power(ACPI_COMPANION(dev)); + + /* + * Before the _PS0 call the int GPIO may have been in output + * mode and the call should have put the int GPIO in input mode, + * but the GPIO subsys cached state may still think it is + * in output mode, causing gpiochip_lock_as_irq() failure. + * + * Add a mapping for the int GPIO to make the + * gpiod_int = gpiod_get(..., GPIOD_IN) call succeed, + * which will explicitly set the direction to input. + */ + ts->irq_pin_access_method = IRQ_PIN_ACCESS_NONE; + gpio_mapping = acpi_goodix_int_first_gpios; } else { dev_warn(dev, "Unexpected ACPI resources: gpio_count %d, gpio_int_idx %d\n", ts->gpio_count, ts->gpio_int_idx); -- cgit v1.2.3 From 643445531829d89dc5ddbe0c5ee4ff8f84ce8687 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 20 Sep 2023 14:07:04 +0800 Subject: workqueue: Fix UAF report by KASAN in pwq_release_workfn() Currently, for UNBOUND wq, if the apply_wqattrs_prepare() return error, the apply_wqattr_cleanup() will be called and use the pwq_release_worker kthread to release resources asynchronously. however, the kfree(wq) is invoked directly in failure path of alloc_workqueue(), if the kfree(wq) has been executed and when the pwq_release_workfn() accesses wq, this leads to the following scenario: BUG: KASAN: slab-use-after-free in pwq_release_workfn+0x339/0x380 kernel/workqueue.c:4124 Read of size 4 at addr ffff888027b831c0 by task pool_workqueue_/3 CPU: 0 PID: 3 Comm: pool_workqueue_ Not tainted 6.5.0-rc7-next-20230825-syzkaller #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 pwq_release_workfn+0x339/0x380 kernel/workqueue.c:4124 kthread_worker_fn+0x2fc/0xa80 kernel/kthread.c:823 kthread+0x33a/0x430 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 Allocated by task 5054: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0xa2/0xb0 mm/kasan/common.c:383 kmalloc include/linux/slab.h:599 [inline] kzalloc include/linux/slab.h:720 [inline] alloc_workqueue+0x16f/0x1490 kernel/workqueue.c:4684 kvm_mmu_init_tdp_mmu+0x23/0x100 arch/x86/kvm/mmu/tdp_mmu.c:19 kvm_mmu_init_vm+0x248/0x2e0 arch/x86/kvm/mmu/mmu.c:6180 kvm_arch_init_vm+0x39/0x720 arch/x86/kvm/x86.c:12311 kvm_create_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1222 [inline] kvm_dev_ioctl_create_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:5089 [inline] kvm_dev_ioctl+0xa31/0x1c20 arch/x86/kvm/../../../virt/kvm/kvm_main.c:5131 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 5054: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:522 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x15b/0x1b0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:164 [inline] slab_free_hook mm/slub.c:1800 [inline] slab_free_freelist_hook+0x114/0x1e0 mm/slub.c:1826 slab_free mm/slub.c:3809 [inline] __kmem_cache_free+0xb8/0x2f0 mm/slub.c:3822 alloc_workqueue+0xe76/0x1490 kernel/workqueue.c:4746 kvm_mmu_init_tdp_mmu+0x23/0x100 arch/x86/kvm/mmu/tdp_mmu.c:19 kvm_mmu_init_vm+0x248/0x2e0 arch/x86/kvm/mmu/mmu.c:6180 kvm_arch_init_vm+0x39/0x720 arch/x86/kvm/x86.c:12311 kvm_create_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1222 [inline] kvm_dev_ioctl_create_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:5089 [inline] kvm_dev_ioctl+0xa31/0x1c20 arch/x86/kvm/../../../virt/kvm/kvm_main.c:5131 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd This commit therefore flush pwq_release_worker in the alloc_and_link_pwqs() before invoke kfree(wq). Reported-by: syzbot+60db9f652c92d5bacba4@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=60db9f652c92d5bacba4 Signed-off-by: Zqiang Signed-off-by: Tejun Heo --- kernel/workqueue.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b9f053a5a5f0..ebe24a5e1435 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4600,6 +4600,12 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) } cpus_read_unlock(); + /* for unbound pwq, flush the pwq_release_worker ensures that the + * pwq_release_workfn() completes before calling kfree(wq). + */ + if (ret) + kthread_flush_worker(pwq_release_worker); + return ret; enomem: -- cgit v1.2.3 From b15e7490a1efd4c0f54434c3a85ced1b6b536b7a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 21 Sep 2023 10:16:41 -0700 Subject: KVM: selftests: Treat %llx like %lx when formatting guest printf Treat %ll* formats the same as %l* formats when processing printfs from the guest so that using e.g. %llx instead of %lx generates the expected output. Ideally, unexpected formats would generate compile-time warnings or errors, but it's not at all obvious how to actually accomplish that. Alternatively, guest_vsnprintf() could assert on an unexpected format, but since the vast majority of printfs are for failed guest asserts, getting *something* printed is better than nothing. E.g. before ==== Test Assertion Failure ==== x86_64/private_mem_conversions_test.c:265: mem[i] == 0 pid=4286 tid=4290 errno=4 - Interrupted system call 1 0x0000000000401c74: __test_mem_conversions at private_mem_conversions_test.c:336 2 0x00007f3aae6076da: ?? ??:0 3 0x00007f3aae32161e: ?? ??:0 Expected 0x0 at offset 0 (gpa 0x%lx), got 0x0 and after ==== Test Assertion Failure ==== x86_64/private_mem_conversions_test.c:265: mem[i] == 0 pid=5664 tid=5668 errno=4 - Interrupted system call 1 0x0000000000401c74: __test_mem_conversions at private_mem_conversions_test.c:336 2 0x00007fbe180076da: ?? ??:0 3 0x00007fbe17d2161e: ?? ??:0 Expected 0x0 at offset 0 (gpa 0x100000000), got 0xcc Fixes: e5119382499c ("KVM: selftests: Add guest_snprintf() to KVM selftests") Cc: Aaron Lewis Link: https://lore.kernel.org/r/20230921171641.3641776-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/guest_sprintf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index c4a69d8aeb68..74627514c4d4 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -200,6 +200,13 @@ repeat: ++fmt; } + /* + * Play nice with %llu, %llx, etc. KVM selftests only support + * 64-bit builds, so just treat %ll* the same as %l*. + */ + if (qualifier == 'l' && *fmt == 'l') + ++fmt; + /* default base */ base = 10; -- cgit v1.2.3 From 332c4d90a09c2cdc59f88d6a6c58c1601403b891 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Thu, 14 Sep 2023 17:48:03 +0800 Subject: KVM: selftests: Remove obsolete and incorrect test case metadata Delete inaccurate descriptions and obsolete metadata for test cases. It adds zero value, and has a non-zero chance of becoming stale and misleading in the future. No functional changes intended. Suggested-by: Sean Christopherson Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20230914094803.94661-1-likexu@tencent.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/ucall_common.h | 2 -- tools/testing/selftests/kvm/lib/x86_64/apic.c | 2 -- tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c | 2 -- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 2 -- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh | 1 - tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c | 4 ---- tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c | 4 ---- 7 files changed, 17 deletions(-) diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 112bc1da732a..ce33d306c2cb 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/kvm_util.h - * * Copyright (C) 2018, Google LLC. */ #ifndef SELFTEST_KVM_UCALL_COMMON_H diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c index 7168e25c194e..89153a333e83 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2021, Google LLC. */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index e446d76d1c0c..6c1278562090 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * KVM_GET/SET_* tests - * * Copyright (C) 2022, Red Hat, Inc. * * Tests for Hyper-V extensions to SVM. diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 7f36c32fa760..18ac5c1952a3 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/nx_huge_page_test.c - * * Usage: to be run via nx_huge_page_test.sh, which does the necessary * environment setup and teardown * diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh index 0560149e66ed..7cbb409801ee 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh @@ -4,7 +4,6 @@ # Wrapper script which performs setup and cleanup for nx_huge_pages_test. # Makes use of root privileges to set up huge pages and KVM module parameters. # -# tools/testing/selftests/kvm/nx_huge_page_test.sh # Copyright (C) 2022, Google LLC. set -e diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c index 5b669818e39a..59c7304f805e 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 05898ad9f4d9..9ec9ab60b63e 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" -- cgit v1.2.3 From 0f28ada1fbf0054557cddcdb93ad17f767105208 Mon Sep 17 00:00:00 2001 From: Jorge Sanjuan Garcia Date: Wed, 6 Sep 2023 11:49:26 +0000 Subject: mcb: remove is_added flag from mcb_device struct When calling mcb_bus_add_devices(), both mcb devices and the mcb bus will attempt to attach a device to a driver because they share the same bus_type. This causes an issue when trying to cast the container of the device to mcb_device struct using to_mcb_device(), leading to a wrong cast when the mcb_bus is added. A crash occurs when freing the ida resources as the bus numbering of mcb_bus gets confused with the is_added flag on the mcb_device struct. The only reason for this cast was to keep an is_added flag on the mcb_device struct that does not seem necessary. The function device_attach() handles already bound devices and the mcb subsystem does nothing special with this is_added flag so remove it completely. Fixes: 18d288198099 ("mcb: Correctly initialize the bus's device") Cc: stable Signed-off-by: Jorge Sanjuan Garcia Co-developed-by: Jose Javier Rodriguez Barbarin Signed-off-by: Jose Javier Rodriguez Barbarin Link: https://lore.kernel.org/r/20230906114901.63174-2-JoseJavier.Rodriguez@duagon.com Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-core.c | 10 +++------- drivers/mcb/mcb-parse.c | 2 -- include/linux/mcb.h | 1 - 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c index 978fdfc19a06..0cac5bead84f 100644 --- a/drivers/mcb/mcb-core.c +++ b/drivers/mcb/mcb-core.c @@ -387,17 +387,13 @@ EXPORT_SYMBOL_NS_GPL(mcb_free_dev, MCB); static int __mcb_bus_add_devices(struct device *dev, void *data) { - struct mcb_device *mdev = to_mcb_device(dev); int retval; - if (mdev->is_added) - return 0; - retval = device_attach(dev); - if (retval < 0) + if (retval < 0) { dev_err(dev, "Error adding device (%d)\n", retval); - - mdev->is_added = true; + return retval; + } return 0; } diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c index 2aef990f379f..656b6b71c768 100644 --- a/drivers/mcb/mcb-parse.c +++ b/drivers/mcb/mcb-parse.c @@ -99,8 +99,6 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, mdev->mem.end = mdev->mem.start + size - 1; mdev->mem.flags = IORESOURCE_MEM; - mdev->is_added = false; - ret = mcb_device_register(bus, mdev); if (ret < 0) goto err; diff --git a/include/linux/mcb.h b/include/linux/mcb.h index 1e5893138afe..0b971b24a804 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -63,7 +63,6 @@ static inline struct mcb_bus *to_mcb_bus(struct device *dev) struct mcb_device { struct device dev; struct mcb_bus *bus; - bool is_added; struct mcb_driver *driver; u16 id; int inst; -- cgit v1.2.3 From b83ce9cb4a465b8f9a3fa45561b721a9551f60e3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 8 Sep 2023 10:27:23 +0200 Subject: dma-buf: add dma_fence_timestamp helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a fence signals there is a very small race window where the timestamp isn't updated yet. sync_file solves this by busy waiting for the timestamp to appear, but on other ocassions didn't handled this correctly. Provide a dma_fence_timestamp() helper function for this and use it in all appropriate cases. Another alternative would be to grab the spinlock when that happens. v2 by teddy: add a wait parameter to wait for the timestamp to show up, in case the accurate timestamp is needed and/or the timestamp is not based on ktime (e.g. hw timestamp) v3 chk: drop the parameter again for unified handling Signed-off-by: Yunxiang Li Signed-off-by: Christian König Fixes: 1774baa64f93 ("drm/scheduler: Change scheduled fence track v2") Reviewed-by: Alex Deucher CC: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20230929104725.2358-1-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-unwrap.c | 13 ++++--------- drivers/dma-buf/sync_file.c | 9 +++------ drivers/gpu/drm/scheduler/sched_main.c | 2 +- include/linux/dma-fence.h | 19 +++++++++++++++++++ 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index c625bb2b5d56..628af51c81af 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -76,16 +76,11 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) { if (!dma_fence_is_signaled(tmp)) { ++count; - } else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, - &tmp->flags)) { - if (ktime_after(tmp->timestamp, timestamp)) - timestamp = tmp->timestamp; } else { - /* - * Use the current time if the fence is - * currently signaling. - */ - timestamp = ktime_get(); + ktime_t t = dma_fence_timestamp(tmp); + + if (ktime_after(t, timestamp)) + timestamp = t; } } } diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index af57799c86ce..2e9a316c596a 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -268,13 +268,10 @@ static int sync_fill_fence_info(struct dma_fence *fence, sizeof(info->driver_name)); info->status = dma_fence_get_status(fence); - while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && - !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) - cpu_relax(); info->timestamp_ns = - test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ? - ktime_to_ns(fence->timestamp) : - ktime_set(0, 0); + dma_fence_is_signaled(fence) ? + ktime_to_ns(dma_fence_timestamp(fence)) : + ktime_set(0, 0); return info->status; } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 506371c42745..5a3a622fc672 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -929,7 +929,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) if (next) { next->s_fence->scheduled.timestamp = - job->s_fence->finished.timestamp; + dma_fence_timestamp(&job->s_fence->finished); /* start TO timer for next job */ drm_sched_start_timeout(sched); } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 0d678e9a7b24..ebe78bd3d121 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -568,6 +568,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence, fence->error = error; } +/** + * dma_fence_timestamp - helper to get the completion timestamp of a fence + * @fence: fence to get the timestamp from. + * + * After a fence is signaled the timestamp is updated with the signaling time, + * but setting the timestamp can race with tasks waiting for the signaling. This + * helper busy waits for the correct timestamp to appear. + */ +static inline ktime_t dma_fence_timestamp(struct dma_fence *fence) +{ + if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) + return ktime_get(); + + while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) + cpu_relax(); + + return fence->timestamp; +} + signed long dma_fence_wait_timeout(struct dma_fence *, bool intr, signed long timeout); signed long dma_fence_wait_any_timeout(struct dma_fence **fences, -- cgit v1.2.3 From 39fef15b5f2db46619393f9fd20fdd26a2ff49ef Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 Oct 2023 17:50:03 -0700 Subject: Documentation: embargoed-hardware-issues.rst: Clarify prenotifaction There has been a repeated misunderstanding about what the hardware embargo list is for. Clarify the language in the process so that it is clear that only fixes are coordinated. There is explicitly no prenotification process. The list members are also expected to keep total radio silence during embargoes. Cc: Thomas Gleixner Cc: workflows@vger.kernel.org Cc: linux-doc@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20231004004959.work.258-kees@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index ac7c52f130c9..31000f075707 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -25,15 +25,15 @@ Contact The Linux kernel hardware security team is separate from the regular Linux kernel security team. -The team only handles the coordination of embargoed hardware security -issues. Reports of pure software security bugs in the Linux kernel are not +The team only handles developing fixes for embargoed hardware security +issues. Reports of pure software security bugs in the Linux kernel are not handled by this team and the reporter will be guided to contact the regular Linux kernel security team (:ref:`Documentation/admin-guide/ `) instead. The team can be contacted by email at . This -is a private list of security officers who will help you to coordinate an -issue according to our documented process. +is a private list of security officers who will help you to coordinate a +fix according to our documented process. The list is encrypted and email to the list can be sent by either PGP or S/MIME encrypted and must be signed with the reporter's PGP key or S/MIME @@ -132,11 +132,11 @@ other hardware could be affected. The hardware security team will provide an incident-specific encrypted mailing-list which will be used for initial discussion with the reporter, -further disclosure and coordination. +further disclosure, and coordination of fixes. The hardware security team will provide the disclosing party a list of developers (domain experts) who should be informed initially about the -issue after confirming with the developers that they will adhere to this +issue after confirming with the developers that they will adhere to this Memorandum of Understanding and the documented process. These developers form the initial response team and will be responsible for handling the issue after initial contact. The hardware security team is supporting the @@ -209,13 +209,18 @@ five work days this is taken as silent acknowledgement. After acknowledgement or resolution of an objection the expert is disclosed by the incident team and brought into the development process. +List participants may not communicate about the issue outside of the +private mailing list. List participants may not use any shared resources +(e.g. employer build farms, CI systems, etc) when working on patches. + Coordinated release """"""""""""""""""" The involved parties will negotiate the date and time where the embargo ends. At that point the prepared mitigations are integrated into the -relevant kernel trees and published. +relevant kernel trees and published. There is no pre-notification process: +fixes are published in public and available to everyone at the same time. While we understand that hardware security issues need coordinated embargo time, the embargo time should be constrained to the minimum time which is -- cgit v1.2.3 From 53604854c6f00ce1db4393499125aff94e3d9bbd Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Fri, 29 Sep 2023 08:13:26 -0700 Subject: firmware_loader: Update contact emails for ABI docs Update the firmware_loader documentation and corresponding section in the MAINTAINERs file with a new email address. Signed-off-by: Russ Weight Link: https://lore.kernel.org/r/20230929151326.311959-1-russell.h.weight@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-firmware | 14 +++++++------- MAINTAINERS | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-firmware b/Documentation/ABI/testing/sysfs-class-firmware index 978d3d500400..fba87a55f3ca 100644 --- a/Documentation/ABI/testing/sysfs-class-firmware +++ b/Documentation/ABI/testing/sysfs-class-firmware @@ -1,7 +1,7 @@ What: /sys/class/firmware/.../data Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: The data sysfs file is used for firmware-fallback and for firmware uploads. Cat a firmware image to this sysfs file after you echo 1 to the loading sysfs file. When the firmware @@ -13,7 +13,7 @@ Description: The data sysfs file is used for firmware-fallback and for What: /sys/class/firmware/.../cancel Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: Write-only. For firmware uploads, write a "1" to this file to request that the transfer of firmware data to the lower-level device be canceled. This request will be rejected (EBUSY) if @@ -23,7 +23,7 @@ Description: Write-only. For firmware uploads, write a "1" to this file to What: /sys/class/firmware/.../error Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: Read-only. Returns a string describing a failed firmware upload. This string will be in the form of :, where will be one of the status strings described @@ -37,7 +37,7 @@ Description: Read-only. Returns a string describing a failed firmware What: /sys/class/firmware/.../loading Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: The loading sysfs file is used for both firmware-fallback and for firmware uploads. Echo 1 onto the loading file to indicate you are writing a firmware file to the data sysfs node. Echo @@ -49,7 +49,7 @@ Description: The loading sysfs file is used for both firmware-fallback and What: /sys/class/firmware/.../remaining_size Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: Read-only. For firmware upload, this file contains the size of the firmware data that remains to be transferred to the lower-level device driver. The size value is initialized to @@ -62,7 +62,7 @@ Description: Read-only. For firmware upload, this file contains the size What: /sys/class/firmware/.../status Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: Read-only. Returns a string describing the current status of a firmware upload. The string will be one of the following: idle, "receiving", "preparing", "transferring", "programming". @@ -70,7 +70,7 @@ Description: Read-only. Returns a string describing the current status of What: /sys/class/firmware/.../timeout Date: July 2022 KernelVersion: 5.19 -Contact: Russ Weight +Contact: Russ Weight Description: This file supports the timeout mechanism for firmware fallback. This file has no affect on firmware uploads. For more information on timeouts please see the documentation diff --git a/MAINTAINERS b/MAINTAINERS index 35977b269d5e..3fcbd6b702a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8093,7 +8093,7 @@ F: include/linux/arm_ffa.h FIRMWARE LOADER (request_firmware) M: Luis Chamberlain -M: Russ Weight +M: Russ Weight L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/firmware_class/ -- cgit v1.2.3 From 1aa3aaf8953c84bad398adf6c3cabc9d6685bf7d Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 22 Sep 2023 17:51:37 +0000 Subject: binder: fix memory leaks of spam and pending work A transaction complete work is allocated and queued for each transaction. Under certain conditions the work->type might be marked as BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT to notify userspace about potential spamming threads or as BINDER_WORK_TRANSACTION_PENDING when the target is currently frozen. However, these work types are not being handled in binder_release_work() so they will leak during a cleanup. This was reported by syzkaller with the following kmemleak dump: BUG: memory leak unreferenced object 0xffff88810e2d6de0 (size 32): comm "syz-executor338", pid 5046, jiffies 4294968230 (age 13.590s) hex dump (first 32 bytes): e0 6d 2d 0e 81 88 ff ff e0 6d 2d 0e 81 88 ff ff .m-......m-..... 04 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmalloc_trace+0x25/0x90 mm/slab_common.c:1114 [] kmalloc include/linux/slab.h:599 [inline] [] kzalloc include/linux/slab.h:720 [inline] [] binder_transaction+0x573/0x4050 drivers/android/binder.c:3152 [] binder_thread_write+0x6b5/0x1860 drivers/android/binder.c:4010 [] binder_ioctl_write_read drivers/android/binder.c:5066 [inline] [] binder_ioctl+0x1b2c/0x3cf0 drivers/android/binder.c:5352 [] vfs_ioctl fs/ioctl.c:51 [inline] [] __do_sys_ioctl fs/ioctl.c:871 [inline] [] __se_sys_ioctl fs/ioctl.c:857 [inline] [] __x64_sys_ioctl+0xf2/0x140 fs/ioctl.c:857 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fix the leaks by kfreeing these work types in binder_release_work() and handle them as a BINDER_WORK_TRANSACTION_COMPLETE cleanup. Cc: stable@vger.kernel.org Fixes: 0567461a7a6e ("binder: return pending info for frozen async txns") Fixes: a7dc1e6f99df ("binder: tell userspace to dump current backtrace when detected oneway spamming") Reported-by: syzbot+7f10c1653e35933c0f1e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7f10c1653e35933c0f1e Suggested-by: Alice Ryhl Signed-off-by: Carlos Llamas Reviewed-by: Alice Ryhl Acked-by: Todd Kjos Link: https://lore.kernel.org/r/20230922175138.230331-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 367afac5f1bf..92128aae2d06 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4812,6 +4812,8 @@ static void binder_release_work(struct binder_proc *proc, "undelivered TRANSACTION_ERROR: %u\n", e->cmd); } break; + case BINDER_WORK_TRANSACTION_PENDING: + case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: case BINDER_WORK_TRANSACTION_COMPLETE: { binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered TRANSACTION_COMPLETE\n"); -- cgit v1.2.3 From c6fd91276d64039d068f5ceaf7ad335475bb0389 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 8 Aug 2023 08:32:23 +0200 Subject: dt-bindings: iio: rohm,bu27010: add missing vdd-supply to example Bindings require vdd-supply but the example DTS was missing one. This fixes dt_binding_check error: Documentation/devicetree/bindings/iio/light/rohm,bu27010.example.dtb: light-sensor@38: 'vdd-supply' is a required property Fixes: ce2a8c160066 ("dt-bindings: iio: ROHM BU27010 RGBC + flickering sensor") Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Reviewed-by: Matti Vaittinen Link: https://lore.kernel.org/r/20230808063223.80431-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml b/Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml index 8376d64a641a..bed42d5d0d94 100644 --- a/Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml +++ b/Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml @@ -45,5 +45,6 @@ examples: light-sensor@38 { compatible = "rohm,bu27010"; reg = <0x38>; + vdd-supply = <&vdd>; }; }; -- cgit v1.2.3 From ea191d0fd361fb569b6c3d19e5410510aa6b6bac Mon Sep 17 00:00:00 2001 From: "GONG, Ruiqi" Date: Thu, 10 Aug 2023 11:59:10 +0800 Subject: iio: irsd200: fix -Warray-bounds bug in irsd200_trigger_handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling with gcc 13 with -Warray-bounds enabled: In file included from drivers/iio/proximity/irsd200.c:15: In function ‘iio_push_to_buffers_with_timestamp’, inlined from ‘irsd200_trigger_handler’ at drivers/iio/proximity/irsd200.c:770:2: ./include/linux/iio/buffer.h:42:46: error: array subscript ‘int64_t {aka long long int}[0]’ is partly outside array bounds of ‘s16[1]’ {aka ‘short int[1]’} [-Werror=array-bounds=] 42 | ((int64_t *)data)[ts_offset] = timestamp; | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~ drivers/iio/proximity/irsd200.c: In function ‘irsd200_trigger_handler’: drivers/iio/proximity/irsd200.c:763:13: note: object ‘buf’ of size 2 763 | s16 buf = 0; | ^~~ The problem seems to be that irsd200_trigger_handler() is taking a s16 variable as an int64_t buffer. As Jonathan suggested [1], fix it by extending the buffer to a two-element array of s64. Link: https://github.com/KSPP/linux/issues/331 Link: https://lore.kernel.org/lkml/20230809181329.46c00a5d@jic23-huawei/ [1] Fixes: 3db3562bc66e ("iio: Add driver for Murata IRS-D200") Signed-off-by: GONG, Ruiqi Acked-by: Gustavo A. R. Silva Reviewed-by: Waqar Hameed Tested-by: Waqar Hameed Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230810035910.1334706-1-gongruiqi@huaweicloud.com Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/irsd200.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/proximity/irsd200.c b/drivers/iio/proximity/irsd200.c index 5bd791b46d98..bdff91f6b1a3 100644 --- a/drivers/iio/proximity/irsd200.c +++ b/drivers/iio/proximity/irsd200.c @@ -759,14 +759,14 @@ static irqreturn_t irsd200_trigger_handler(int irq, void *pollf) { struct iio_dev *indio_dev = ((struct iio_poll_func *)pollf)->indio_dev; struct irsd200_data *data = iio_priv(indio_dev); - s16 buf = 0; + s64 buf[2] = {}; int ret; - ret = irsd200_read_data(data, &buf); + ret = irsd200_read_data(data, (s16 *)buf); if (ret) goto end; - iio_push_to_buffers_with_timestamp(indio_dev, &buf, + iio_push_to_buffers_with_timestamp(indio_dev, buf, iio_get_time_ns(indio_dev)); end: -- cgit v1.2.3 From 7771c8c80d62ad065637ef74ed2962983f6c5f6d Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Tue, 29 Aug 2023 11:06:22 +0800 Subject: iio: cros_ec: fix an use-after-free in cros_ec_sensors_push_data() cros_ec_sensors_push_data() reads `indio_dev->active_scan_mask` and calls iio_push_to_buffers_with_timestamp() without making sure the `indio_dev` stays in buffer mode. There is a race if `indio_dev` exits buffer mode right before cros_ec_sensors_push_data() accesses them. An use-after-free on `indio_dev->active_scan_mask` was observed. The call trace: [...] _find_next_bit cros_ec_sensors_push_data cros_ec_sensorhub_event blocking_notifier_call_chain cros_ec_irq_thread It was caused by a race condition: one thread just freed `active_scan_mask` at [1]; while another thread tried to access the memory at [2]. Fix it by calling iio_device_claim_buffer_mode() to ensure the `indio_dev` can't exit buffer mode during cros_ec_sensors_push_data(). [1]: https://elixir.bootlin.com/linux/v6.5/source/drivers/iio/industrialio-buffer.c#L1189 [2]: https://elixir.bootlin.com/linux/v6.5/source/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c#L198 Cc: stable@vger.kernel.org Fixes: aa984f1ba4a4 ("iio: cros_ec: Register to cros_ec_sensorhub when EC supports FIFO") Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20230829030622.1571852-1-tzungbi@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c index b72d39fc2434..6bfe5d6847e7 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c @@ -190,8 +190,11 @@ int cros_ec_sensors_push_data(struct iio_dev *indio_dev, /* * Ignore samples if the buffer is not set: it is needed if the ODR is * set but the buffer is not enabled yet. + * + * Note: iio_device_claim_buffer_mode() returns -EBUSY if the buffer + * is not enabled. */ - if (!iio_buffer_enabled(indio_dev)) + if (iio_device_claim_buffer_mode(indio_dev) < 0) return 0; out = (s16 *)st->samples; @@ -210,6 +213,7 @@ int cros_ec_sensors_push_data(struct iio_dev *indio_dev, iio_push_to_buffers_with_timestamp(indio_dev, st->samples, timestamp + delta); + iio_device_release_buffer_mode(indio_dev); return 0; } EXPORT_SYMBOL_GPL(cros_ec_sensors_push_data); -- cgit v1.2.3 From 850101b3598277794f92a9e363a60a66e0d42890 Mon Sep 17 00:00:00 2001 From: Philipp Rossak Date: Tue, 5 Sep 2023 00:02:04 +0200 Subject: iio: adc: imx8qxp: Fix address for command buffer registers The ADC Command Buffer Register high and low are currently pointing to the wrong address and makes it impossible to perform correct ADC measurements over all channels. According to the datasheet of the imx8qxp the ADC_CMDL register starts at address 0x100 and the ADC_CMDH register starts at address 0x104. This bug seems to be in the kernel since the introduction of this driver. This can be observed by checking all raw voltages of the adc and they are all nearly identical: cat /sys/bus/iio/devices/iio\:device0/in_voltage*_raw 3498 3494 3491 3491 3489 3490 3490 3490 Fixes: 1e23dcaa1a9fa ("iio: imx8qxp-adc: Add driver support for NXP IMX8QXP ADC") Signed-off-by: Philipp Rossak Acked-by: Haibo Chen Link: https://lore.kernel.org/r/20230904220204.23841-1-embed3d@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/imx8qxp-adc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/imx8qxp-adc.c b/drivers/iio/adc/imx8qxp-adc.c index f5a0fc9e64c5..fff6e5a2d956 100644 --- a/drivers/iio/adc/imx8qxp-adc.c +++ b/drivers/iio/adc/imx8qxp-adc.c @@ -38,8 +38,8 @@ #define IMX8QXP_ADR_ADC_FCTRL 0x30 #define IMX8QXP_ADR_ADC_SWTRIG 0x34 #define IMX8QXP_ADR_ADC_TCTRL(tid) (0xc0 + (tid) * 4) -#define IMX8QXP_ADR_ADC_CMDH(cid) (0x100 + (cid) * 8) -#define IMX8QXP_ADR_ADC_CMDL(cid) (0x104 + (cid) * 8) +#define IMX8QXP_ADR_ADC_CMDL(cid) (0x100 + (cid) * 8) +#define IMX8QXP_ADR_ADC_CMDH(cid) (0x104 + (cid) * 8) #define IMX8QXP_ADR_ADC_RESFIFO 0x300 #define IMX8QXP_ADR_ADC_TST 0xffc -- cgit v1.2.3 From c9b9cfe7d342683f624a89c3b617be18aff879e8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 3 Sep 2023 12:30:52 +0100 Subject: iio: imu: bno055: Fix missing Kconfig dependencies This driver uses IIO triggered buffers so it needs to select them in Kconfig. on riscv-32bit: /opt/crosstool/gcc-13.2.0-nolibc/riscv32-linux/bin/riscv32-linux-ld: drivers/iio/imu/bno055/bno055.o: in function `.L367': bno055.c:(.text+0x2c96): undefined reference to `devm_iio_triggered_buffer_setup_ext' Reported-by: Randy Dunlap Closes: https://lore.kernel.org/linux-next/40566b4b-3950-81fe-ff14-871d8c447627@infradead.org/ Fixes: 4aefe1c2bd0c ("iio: imu: add Bosch Sensortec BNO055 core driver") Cc: Andrea Merello Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20230903113052.846298-1-jic23@kernel.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bno055/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig index fa79b1ac4f85..83e53acfbe88 100644 --- a/drivers/iio/imu/bno055/Kconfig +++ b/drivers/iio/imu/bno055/Kconfig @@ -2,6 +2,8 @@ config BOSCH_BNO055 tristate + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER config BOSCH_BNO055_SERIAL tristate "Bosch BNO055 attached via UART" -- cgit v1.2.3 From 901a293fd96fb9bab843ba4cc7be3094a5aa7c94 Mon Sep 17 00:00:00 2001 From: Lakshmi Yadlapati Date: Tue, 29 Aug 2023 13:02:22 -0500 Subject: iio: pressure: dps310: Adjust Timeout Settings The DPS310 sensor chip has been encountering intermittent errors while reading the sensor device across various system designs. This issue causes the chip to become "stuck," preventing the indication of "ready" status for pressure and temperature measurements in the MEAS_CFG register. To address this issue, this commit fixes the timeout settings to improve sensor stability: - After sending a reset command to the chip, the timeout has been extended from 2.5 ms to 15 ms, aligning with the DPS310 specification. - The read timeout value of the MEAS_CFG register has been adjusted from 20ms to 30ms to match the specification. Signed-off-by: Lakshmi Yadlapati Fixes: 7b4ab4abcea4 ("iio: pressure: dps310: Reset chip after timeout") Link: https://lore.kernel.org/r/20230829180222.3431926-2-lakshmiy@us.ibm.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/dps310.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c index b10dbf5cf494..1ff091b2f764 100644 --- a/drivers/iio/pressure/dps310.c +++ b/drivers/iio/pressure/dps310.c @@ -57,8 +57,8 @@ #define DPS310_RESET_MAGIC 0x09 #define DPS310_COEF_BASE 0x10 -/* Make sure sleep time is <= 20ms for usleep_range */ -#define DPS310_POLL_SLEEP_US(t) min(20000, (t) / 8) +/* Make sure sleep time is <= 30ms for usleep_range */ +#define DPS310_POLL_SLEEP_US(t) min(30000, (t) / 8) /* Silently handle error in rate value here */ #define DPS310_POLL_TIMEOUT_US(rc) ((rc) <= 0 ? 1000000 : 1000000 / (rc)) @@ -402,8 +402,8 @@ static int dps310_reset_wait(struct dps310_data *data) if (rc) return rc; - /* Wait for device chip access: 2.5ms in specification */ - usleep_range(2500, 12000); + /* Wait for device chip access: 15ms in specification */ + usleep_range(15000, 55000); return 0; } -- cgit v1.2.3 From b120dd3a15582fb7a959cecb05e4d9814fcba386 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Tue, 12 Sep 2023 11:54:21 +0300 Subject: iio: addac: Kconfig: update ad74413r selections Building ad74413r without selecting IIO_BUFFER and IIO_TRIGGERED_BUFFER generates error with respect to the iio trigger functions that are used within the driver. Update the Kconfig accordingly. Fixes: fea251b6a5db ("iio: addac: add AD74413R driver") Signed-off-by: Antoniu Miclaus Link: https://lore.kernel.org/r/20230912085421.51102-1-antoniu.miclaus@analog.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/addac/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/addac/Kconfig b/drivers/iio/addac/Kconfig index 877f9124803c..397544f23b85 100644 --- a/drivers/iio/addac/Kconfig +++ b/drivers/iio/addac/Kconfig @@ -24,6 +24,8 @@ config AD74413R depends on GPIOLIB && SPI select REGMAP_SPI select CRC8 + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to build support for Analog Devices AD74412R/AD74413R quad-channel software configurable input/output solution. -- cgit v1.2.3 From 7e87ab38eed09c9dec56da361d74158159ae84a3 Mon Sep 17 00:00:00 2001 From: Mårten Lindahl Date: Mon, 11 Sep 2023 14:43:01 +0200 Subject: iio: light: vcnl4000: Don't power on/off chip in config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After enabling/disabling interrupts on the vcnl4040 chip the als and/or ps sensor is powered on or off depending on the interrupt enable bits. This is made as a last step in write_event_config. But there is no reason to do this as the runtime PM handles the power state of the sensors. Interfering with this may impact sensor readings. Consider the following: 1. Userspace makes sensor data reading which triggers RPM resume (sensor powered on) and a RPM suspend timeout. The timeout is 2000ms before RPM suspend powers the sensor off if no new reading is made within the timeout period. 2. Userspace disables interrupts => powers sensor off 3. Userspace reads sensor data = 0 because sensor is off and the suspend timeout has not passed. For each new reading made within the timeout period the timeout is renewed with 2000ms and RPM will not make a new resume (device was not suspended). So the sensor will not be powered on. 4. No further userspace reading for 2000ms ends RPM suspend timeout and triggers suspend (powers off already powered off sensor). Powering sensor off in (2) makes all consecutive readings made within 2000ms to the previous reading (3) return invalid data. Skip setting power state when writing new event config. Fixes: 546676121cb9 ("iio: light: vcnl4000: Add interrupt support for vcnl4040") Fixes: bc292aaf9cb4 ("iio: light: vcnl4000: add illuminance irq vcnl4040/4200") Signed-off-by: Mårten Lindahl Link: https://lore.kernel.org/r/20230907-vcnl4000-pm-fix-v2-1-298e01f54db4@axis.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/vcnl4000.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c index 3a52b09c2823..fdf763a04b0b 100644 --- a/drivers/iio/light/vcnl4000.c +++ b/drivers/iio/light/vcnl4000.c @@ -1513,7 +1513,6 @@ static int vcnl4040_write_event_config(struct iio_dev *indio_dev, out: mutex_unlock(&data->vcnl4000_lock); - data->chip_spec->set_power_state(data, data->ps_int || data->als_int); return ret; } -- cgit v1.2.3 From 7e7dcab620cd6d34939f615cac63fc0ef7e81c72 Mon Sep 17 00:00:00 2001 From: Alisa-Dariana Roman Date: Sun, 24 Sep 2023 18:21:48 +0300 Subject: iio: adc: ad7192: Correct reference voltage The avdd and the reference voltage are two different sources but the reference voltage was assigned according to the avdd supply. Add vref regulator structure and set the reference voltage according to the vref supply from the devicetree. In case vref supply is missing, reference voltage is set according to the avdd supply for compatibility with old devicetrees. Fixes: b581f748cce0 ("staging: iio: adc: ad7192: move out of staging") Signed-off-by: Alisa-Dariana Roman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230924152149.41884-1-alisadariana@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index 69d1103b9508..b64fd365f83f 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -177,6 +177,7 @@ struct ad7192_chip_info { struct ad7192_state { const struct ad7192_chip_info *chip_info; struct regulator *avdd; + struct regulator *vref; struct clk *mclk; u16 int_vref_mv; u32 fclk; @@ -1008,10 +1009,30 @@ static int ad7192_probe(struct spi_device *spi) if (ret) return dev_err_probe(&spi->dev, ret, "Failed to enable specified DVdd supply\n"); - ret = regulator_get_voltage(st->avdd); - if (ret < 0) { - dev_err(&spi->dev, "Device tree error, reference voltage undefined\n"); - return ret; + st->vref = devm_regulator_get_optional(&spi->dev, "vref"); + if (IS_ERR(st->vref)) { + if (PTR_ERR(st->vref) != -ENODEV) + return PTR_ERR(st->vref); + + ret = regulator_get_voltage(st->avdd); + if (ret < 0) + return dev_err_probe(&spi->dev, ret, + "Device tree error, AVdd voltage undefined\n"); + } else { + ret = regulator_enable(st->vref); + if (ret) { + dev_err(&spi->dev, "Failed to enable specified Vref supply\n"); + return ret; + } + + ret = devm_add_action_or_reset(&spi->dev, ad7192_reg_disable, st->vref); + if (ret) + return ret; + + ret = regulator_get_voltage(st->vref); + if (ret < 0) + return dev_err_probe(&spi->dev, ret, + "Device tree error, Vref voltage undefined\n"); } st->int_vref_mv = ret / 1000; -- cgit v1.2.3 From 87b9a0e3ff31d857ed1d13637898c472964aab7d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 26 Sep 2023 11:43:50 -0500 Subject: dt-bindings: iio: adc: adi,ad7292: Fix additionalProperties on channel nodes "additionalProperties: true" is only for incomplete schemas such as bus child nodes in a bus's schema. That doesn't apply to the "channel" nodes in the adi,ad7292 binding, so fix additionalProperties to be false. Signed-off-by: Rob Herring Acked-by: Conor Dooley Acked-by: Marcelo Schmitt Link: https://lore.kernel.org/r/20230926164357.100325-1-robh@kernel.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml index 7cc4ddc4e9b7..2aa1f4b063eb 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml @@ -61,7 +61,7 @@ patternProperties: required: - reg - additionalProperties: true + additionalProperties: false allOf: - $ref: /schemas/spi/spi-peripheral-props.yaml# -- cgit v1.2.3 From fd39d9668f2ce9f4b05ad55e8c8d80c098073e0b Mon Sep 17 00:00:00 2001 From: Alexander Zangerl Date: Wed, 20 Sep 2023 10:01:10 +1000 Subject: iio: pressure: ms5611: ms5611_prom_is_valid false negative bug The ms5611 driver falsely rejects lots of MS5607-02BA03-50 chips with "PROM integrity check failed" because it doesn't accept a prom crc value of zero as legitimate. According to the datasheet for this chip (and the manufacturer's application note about the PROM CRC), none of the possible values for the CRC are excluded - but the current code in ms5611_prom_is_valid() ends with return crc_orig != 0x0000 && crc == crc_orig Discussed with the driver author (Tomasz Duszynski) and he indicated that at that time (2015) he was dealing with some faulty chip samples which returned blank data under some circumstances and/or followed example code which indicated CRC zero being bad. As far as I can tell this exception should not be applied anymore; We've got a few hundred custom boards here with this chip where large numbers of the prom have a legitimate CRC value 0, and do work fine, but which the current driver code wrongly rejects. Signed-off-by: Alexander Zangerl Fixes: c0644160a8b5 ("iio: pressure: add support for MS5611 pressure and temperature sensor") Link: https://lore.kernel.org/r/2535-1695168070.831792@Ze3y.dhYT.s3fx Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/ms5611_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/pressure/ms5611_core.c b/drivers/iio/pressure/ms5611_core.c index 627497e61a63..2fc706f9d8ae 100644 --- a/drivers/iio/pressure/ms5611_core.c +++ b/drivers/iio/pressure/ms5611_core.c @@ -76,7 +76,7 @@ static bool ms5611_prom_is_valid(u16 *prom, size_t len) crc = (crc >> 12) & 0x000F; - return crc_orig != 0x0000 && crc == crc_orig; + return crc == crc_orig; } static int ms5611_read_prom(struct iio_dev *indio_dev) -- cgit v1.2.3 From 10f20628c9b8e924b8046e63b36b2cea4d2c85e4 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Mon, 11 Sep 2023 15:16:26 -0700 Subject: drm/msm/dpu: fail dpu_plane_atomic_check() based on mdp clk limits Currently, dpu_plane_atomic_check() does not check whether the plane can process the image without exceeding the per chipset limits for MDP clock. This leads to underflow issues because the SSPP is not able to complete the processing for the data rate of the display. Fail the dpu_plane_atomic_check() if the SSPP cannot process the image without exceeding the MDP clock limits. changes in v2: - use crtc_state's adjusted_mode instead of mode Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/556819/ Link: https://lore.kernel.org/r/20230911221627.9569-1-quic_abhinavk@quicinc.com --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 98c1b22e9bca..0be195f9149c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -733,9 +733,11 @@ static int dpu_plane_check_inline_rotation(struct dpu_plane *pdpu, static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, struct dpu_sw_pipe *pipe, struct dpu_sw_pipe_cfg *pipe_cfg, - const struct dpu_format *fmt) + const struct dpu_format *fmt, + const struct drm_display_mode *mode) { uint32_t min_src_size; + struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base); min_src_size = DPU_FORMAT_IS_YUV(fmt) ? 2 : 1; @@ -774,6 +776,12 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, return -EINVAL; } + /* max clk check */ + if (_dpu_plane_calc_clk(mode, pipe_cfg) > kms->perf.max_core_clk_rate) { + DPU_DEBUG_PLANE(pdpu, "plane exceeds max mdp core clk limits\n"); + return -E2BIG; + } + return 0; } @@ -899,12 +907,13 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, r_pipe_cfg->dst_rect.x1 = pipe_cfg->dst_rect.x2; } - ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt); + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, &crtc_state->adjusted_mode); if (ret) return ret; if (r_pipe->sspp) { - ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt); + ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt, + &crtc_state->adjusted_mode); if (ret) return ret; } -- cgit v1.2.3 From 6313e096dbfaf1377ba8f5f8ccd720cc36c576c6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 4 Oct 2023 17:29:54 -0700 Subject: KVM: selftests: Zero-initialize entire test_result in memslot perf test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-initialize the entire test_result structure used by memslot_perf_test instead of zeroing only the fields used to guard the pr_info() calls. gcc 13.2.0 is a bit overzealous and incorrectly thinks that rbestslottime's slot_runtime may be used uninitialized. In file included from memslot_perf_test.c:25: memslot_perf_test.c: In function ‘main’: include/test_util.h:31:22: error: ‘rbestslottime.slot_runtime.tv_nsec’ may be used uninitialized [-Werror=maybe-uninitialized] 31 | #define pr_info(...) printf(__VA_ARGS__) | ^~~~~~~~~~~~~~~~~~~ memslot_perf_test.c:1127:17: note: in expansion of macro ‘pr_info’ 1127 | pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n", | ^~~~~~~ memslot_perf_test.c:1092:28: note: ‘rbestslottime.slot_runtime.tv_nsec’ was declared here 1092 | struct test_result rbestslottime; | ^~~~~~~~~~~~~ include/test_util.h:31:22: error: ‘rbestslottime.slot_runtime.tv_sec’ may be used uninitialized [-Werror=maybe-uninitialized] 31 | #define pr_info(...) printf(__VA_ARGS__) | ^~~~~~~~~~~~~~~~~~~ memslot_perf_test.c:1127:17: note: in expansion of macro ‘pr_info’ 1127 | pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n", | ^~~~~~~ memslot_perf_test.c:1092:28: note: ‘rbestslottime.slot_runtime.tv_sec’ was declared here 1092 | struct test_result rbestslottime; | ^~~~~~~~~~~~~ That can't actually happen, at least not without the "result" structure in test_loop() also being used uninitialized, which gcc doesn't complain about, as writes to rbestslottime are all-or-nothing, i.e. slottimens can't be non-zero without slot_runtime being written. if (!data->mem_size && (!rbestslottime->slottimens || result.slottimens < rbestslottime->slottimens)) *rbestslottime = result; Zero-initialize the structures to make gcc happy even though this is likely a compiler bug. The cost to do so is negligible, both in terms of code and runtime overhead. The only downside is that the compiler won't warn about legitimate usage of "uninitialized" data, e.g. the test could end up consuming zeros instead of useful data. However, given that the test is quite mature and unlikely to see substantial changes, the odds of introducing such bugs are relatively low, whereas being able to compile KVM selftests with -Werror detects issues on a regular basis. Reviewed-by: Maciej S. Szmigiero Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20231005002954.2887098-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/memslot_perf_test.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 20eb2e730800..8698d1ab60d0 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -1033,9 +1033,8 @@ static bool test_loop(const struct test_data *data, struct test_result *rbestruntime) { uint64_t maxslots; - struct test_result result; + struct test_result result = {}; - result.nloops = 0; if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, &result.nloops, &result.slot_runtime, &result.guest_runtime)) { @@ -1089,7 +1088,7 @@ int main(int argc, char *argv[]) .seconds = 5, .runs = 1, }; - struct test_result rbestslottime; + struct test_result rbestslottime = {}; int tctr; if (!check_memory_sizes()) @@ -1098,11 +1097,10 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv, &targs)) return -1; - rbestslottime.slottimens = 0; for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) { const struct test_data *data = &tests[tctr]; unsigned int runctr; - struct test_result rbestruntime; + struct test_result rbestruntime = {}; if (tctr > targs.tfirst) pr_info("\n"); @@ -1110,7 +1108,6 @@ int main(int argc, char *argv[]) pr_info("Testing %s performance with %i runs, %d seconds each\n", data->name, targs.runs, targs.seconds); - rbestruntime.runtimens = 0; for (runctr = 0; runctr < targs.runs; runctr++) if (!test_loop(data, &targs, &rbestslottime, &rbestruntime)) -- cgit v1.2.3 From 81a61051e0ce5fd7e09225c0d5985da08c7954a7 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 5 Oct 2023 10:56:42 +0300 Subject: serial: core: Fix checks for tx runtime PM state Maximilian reported that surface_serial_hub serdev tx does not work during system suspend. During system suspend, runtime PM gets disabled in __device_suspend_late(), and tx is unable to wake-up the serial core port device that we use to check if tx is safe to start. Johan summarized the regression noting that serdev tx no longer always works as earlier when the serdev device is runtime PM active. The serdev device and the serial core controller devices are siblings of the serial port hardware device. The runtime PM usage count from serdev device does not propagate to the serial core device siblings, it only propagates to the parent. In addition to the tx issue for suspend, testing for the serial core port device can cause an unnecessary delay in enabling tx while waiting for the serial core port device to wake-up. The serial core port device wake-up is only needed to flush pending tx when the serial port hardware device was in runtime PM suspended state. To fix the regression, we need to check the runtime PM state of the parent serial port hardware device for tx instead of the serial core port device. As the serial port device drivers may or may not implement runtime PM, we need to also add a check for pm_runtime_enabled(). Reported-by: Maximilian Luz Cc: stable Fixes: 84a9582fd203 ("serial: core: Start managing serial controllers to enable runtime PM") Signed-off-by: Tony Lindgren Tested-by: Maximilian Luz Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231005075644.25936-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index ca26a8aef2cb..d5ba6e90bd95 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -156,7 +156,7 @@ static void __uart_start(struct uart_state *state) * enabled, serial_port_runtime_resume() calls start_tx() again * after enabling the device. */ - if (pm_runtime_active(&port_dev->dev)) + if (!pm_runtime_enabled(port->dev) || pm_runtime_active(port->dev)) port->ops->start_tx(port); pm_runtime_mark_last_busy(&port_dev->dev); pm_runtime_put_autosuspend(&port_dev->dev); -- cgit v1.2.3 From b3fa3cf02e3ce92d32bfdeedd5a6bd0825f55a14 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Sat, 7 Oct 2023 23:38:18 +0200 Subject: ASoC: ti: ams-delta: Fix cx81801_receive() argument types Since types of arguments accepted by tty_ldis_ops::receive_buf() have changed, the driver no longer builds. .../linux/sound/soc/ti/ams-delta.c:403:24: error: initialization of 'void (*)(struct tty_struct *, const u8 *, const u8 *, size_t)' {aka 'void (*)(struct tty_struct *, const unsigned char *, const unsigned char *, unsigned int)'} from incompatible pointer type 'void (*)(struct tty_struct *, const u8 *, const char *, int)' {aka 'void (*)(struct tty_struct *, const unsigned char *, const char *, int)'} [-Werror=incompatible-pointer-types] 403 | .receive_buf = cx81801_receive, Fix it. Fixes: e8161447bb0c ("tty: make tty_ldisc_ops::*buf*() hooks operate on size_t") Fixes: 892bc209f250 ("tty: use u8 for flags") Signed-off-by: Janusz Krzysztofik Link: https://lore.kernel.org/r/20231007213820.376360-1-jmkrzyszt@gmail.com Signed-off-by: Greg Kroah-Hartman --- sound/soc/ti/ams-delta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/ti/ams-delta.c b/sound/soc/ti/ams-delta.c index 371943350fdf..666057d50ea0 100644 --- a/sound/soc/ti/ams-delta.c +++ b/sound/soc/ti/ams-delta.c @@ -336,8 +336,8 @@ static void cx81801_hangup(struct tty_struct *tty) } /* Line discipline .receive_buf() */ -static void cx81801_receive(struct tty_struct *tty, const u8 *cp, - const char *fp, int count) +static void cx81801_receive(struct tty_struct *tty, const u8 *cp, const u8 *fp, + size_t count) { struct snd_soc_component *component = tty->disc_data; const unsigned char *c; -- cgit v1.2.3 From 025d5ac978cc3b47874cc1c03ab096a78b49f278 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 6 Oct 2023 16:51:32 -0700 Subject: x86/resctrl: Fix kernel-doc warnings The kernel test robot reported kernel-doc warnings here: monitor.c:34: warning: Cannot understand * @rmid_free_lru A least recently used list of free RMIDs on line 34 - I thought it was a doc line monitor.c:41: warning: Cannot understand * @rmid_limbo_count count of currently unused but (potentially) on line 41 - I thought it was a doc line monitor.c:50: warning: Cannot understand * @rmid_entry - The entry in the limbo and free lists. on line 50 - I thought it was a doc line We don't have a syntax for documenting individual data items via kernel-doc, so remove the "/**" kernel-doc markers and add a hyphen for consistency. Fixes: 6a445edce657 ("x86/intel_rdt/cqm: Add RDT monitoring initialization") Fixes: 24247aeeabe9 ("x86/intel_rdt/cqm: Improve limbo list processing") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231006235132.16227-1-rdunlap@infradead.org --- arch/x86/kernel/cpu/resctrl/monitor.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index ded1fc7cb7cb..f136ac046851 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -30,15 +30,15 @@ struct rmid_entry { struct list_head list; }; -/** - * @rmid_free_lru A least recently used list of free RMIDs +/* + * @rmid_free_lru - A least recently used list of free RMIDs * These RMIDs are guaranteed to have an occupancy less than the * threshold occupancy */ static LIST_HEAD(rmid_free_lru); -/** - * @rmid_limbo_count count of currently unused but (potentially) +/* + * @rmid_limbo_count - count of currently unused but (potentially) * dirty RMIDs. * This counts RMIDs that no one is currently using but that * may have a occupancy value > resctrl_rmid_realloc_threshold. User can @@ -46,7 +46,7 @@ static LIST_HEAD(rmid_free_lru); */ static unsigned int rmid_limbo_count; -/** +/* * @rmid_entry - The entry in the limbo and free lists. */ static struct rmid_entry *rmid_ptrs; -- cgit v1.2.3 From e53899771a02f798d436655efbd9d4b46c0f9265 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Fri, 6 Oct 2023 11:57:26 -0700 Subject: perf/x86/lbr: Filter vsyscall addresses We found that a panic can occur when a vsyscall is made while LBR sampling is active. If the vsyscall is interrupted (NMI) for perf sampling, this call sequence can occur (most recent at top): __insn_get_emulate_prefix() insn_get_emulate_prefix() insn_get_prefixes() insn_get_opcode() decode_branch_type() get_branch_type() intel_pmu_lbr_filter() intel_pmu_handle_irq() perf_event_nmi_handler() Within __insn_get_emulate_prefix() at frame 0, a macro is called: peek_nbyte_next(insn_byte_t, insn, i) Within this macro, this dereference occurs: (insn)->next_byte Inspecting registers at this point, the value of the next_byte field is the address of the vsyscall made, for example the location of the vsyscall version of gettimeofday() at 0xffffffffff600000. The access to an address in the vsyscall region will trigger an oops due to an unhandled page fault. To fix the bug, filtering for vsyscalls can be done when determining the branch type. This patch will return a "none" branch if a kernel address if found to lie in the vsyscall region. Suggested-by: Alexei Starovoitov Signed-off-by: JP Kobryn Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org --- arch/x86/events/utils.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c index 76b1f8bb0fd5..dab4ed199227 100644 --- a/arch/x86/events/utils.c +++ b/arch/x86/events/utils.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include "perf_event.h" @@ -132,9 +133,9 @@ static int get_branch_type(unsigned long from, unsigned long to, int abort, * The LBR logs any address in the IP, even if the IP just * faulted. This means userspace can control the from address. * Ensure we don't blindly read any address by validating it is - * a known text address. + * a known text address and not a vsyscall address. */ - if (kernel_text_address(from)) { + if (kernel_text_address(from) && !in_gate_area_no_mm(from)) { addr = (void *)from; /* * Assume we can get the maximum possible size -- cgit v1.2.3 From 0618c077a8c20e8c81e367988f70f7e32bb5a717 Mon Sep 17 00:00:00 2001 From: Zhang Shurong Date: Thu, 5 Oct 2023 22:28:35 +0800 Subject: dmaengine: ste_dma40: Fix PM disable depth imbalance in d40_probe The pm_runtime_enable will increase power disable depth. Thus a pairing decrement is needed on the error handling path to keep it balanced according to context. We fix it by calling pm_runtime_disable when error returns. Signed-off-by: Zhang Shurong Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/tencent_DD2D371DB5925B4B602B1E1D0A5FA88F1208@qq.com Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 89e82508c133..002833fb1fa0 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -3668,6 +3668,7 @@ static int __init d40_probe(struct platform_device *pdev) regulator_disable(base->lcpa_regulator); regulator_put(base->lcpa_regulator); } + pm_runtime_disable(base->dev); report_failure: d40_err(dev, "probe failed\n"); -- cgit v1.2.3 From 81337b9a72dc58a5fa0ae8a042e8cb59f9bdec4a Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 4 Oct 2023 18:35:28 +0200 Subject: dmaengine: stm32-mdma: abort resume if no ongoing transfer chan->desc can be null, if transfer is terminated when resume is called, leading to a NULL pointer when retrieving the hwdesc. To avoid this case, check that chan->desc is not null and channel is disabled (transfer previously paused or terminated). Fixes: a4ffb13c8946 ("dmaengine: Add STM32 MDMA driver") Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231004163531.2864160-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 0de234022c6d..cc6f4b00091f 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -1236,6 +1236,10 @@ static int stm32_mdma_resume(struct dma_chan *c) unsigned long flags; u32 status, reg; + /* Transfer can be terminated */ + if (!chan->desc || (stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & STM32_MDMA_CCR_EN)) + return -EPERM; + hwdesc = chan->desc->node[chan->curr_hwdesc].hwdesc; spin_lock_irqsave(&chan->vchan.lock, flags); -- cgit v1.2.3 From a4b306eb83579c07b63dc65cd5bae53b7b4019d0 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 4 Oct 2023 18:35:29 +0200 Subject: dmaengine: stm32-mdma: use Link Address Register to compute residue Current implementation relies on curr_hwdesc index. But to keep this index up to date, Block Transfer interrupt (BTIE) has to be enabled. If it is not, curr_hwdesc is not updated, and then residue is not reliable. Rely on Link Address Register instead. And disable BTIE interrupt in stm32_mdma_setup_xfer() because it is no more needed in case of _prep_slave_sg() to maintain curr_hwdesc up to date. It avoids extra interrupts and also ensures a reliable residue. These improvements are required for STM32 DCMI camera capture use case, which need STM32 DMA and MDMA chaining for good performance. Fixes: 696874322771 ("dmaengine: stm32-mdma: add support to be triggered by STM32 DMA") Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231004163531.2864160-2-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index cc6f4b00091f..da73e13b8c9d 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -777,8 +777,6 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan, /* Enable interrupts */ ccr &= ~STM32_MDMA_CCR_IRQ_MASK; ccr |= STM32_MDMA_CCR_TEIE | STM32_MDMA_CCR_CTCIE; - if (sg_len > 1) - ccr |= STM32_MDMA_CCR_BTIE; desc->ccr = ccr; return 0; @@ -1324,12 +1322,21 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan, { struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); struct stm32_mdma_hwdesc *hwdesc; - u32 cbndtr, residue, modulo, burst_size; + u32 cisr, clar, cbndtr, residue, modulo, burst_size; int i; + cisr = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id)); + residue = 0; - for (i = curr_hwdesc + 1; i < desc->count; i++) { + /* Get the next hw descriptor to process from current transfer */ + clar = stm32_mdma_read(dmadev, STM32_MDMA_CLAR(chan->id)); + for (i = desc->count - 1; i >= 0; i--) { hwdesc = desc->node[i].hwdesc; + + if (hwdesc->clar == clar) + break;/* Current transfer found, stop cumulating */ + + /* Cumulate residue of unprocessed hw descriptors */ residue += STM32_MDMA_CBNDTR_BNDT(hwdesc->cbndtr); } cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id)); -- cgit v1.2.3 From 584970421725b7805db84714b857851fdf7203a9 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 4 Oct 2023 18:35:30 +0200 Subject: dmaengine: stm32-mdma: set in_flight_bytes in case CRQA flag is set CRQA flag is set by hardware when the channel request become active and the channel is enabled. It is cleared by hardware, when the channel request is completed. So when it is set, it means MDMA is transferring bytes. This information is useful in case of STM32 DMA and MDMA chaining, especially when the user pauses DMA before stopping it, to trig one last MDMA transfer to get the latest bytes of the SRAM buffer to the destination buffer. STM32 DCMI driver can then use this to know if the last MDMA transfer in case of chaining is done. Fixes: 696874322771 ("dmaengine: stm32-mdma: add support to be triggered by STM32 DMA") Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231004163531.2864160-3-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index da73e13b8c9d..bae08b3f55c7 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -1318,7 +1318,8 @@ static int stm32_mdma_slave_config(struct dma_chan *c, static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan, struct stm32_mdma_desc *desc, - u32 curr_hwdesc) + u32 curr_hwdesc, + struct dma_tx_state *state) { struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); struct stm32_mdma_hwdesc *hwdesc; @@ -1342,6 +1343,10 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan, cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id)); residue += cbndtr & STM32_MDMA_CBNDTR_BNDT_MASK; + state->in_flight_bytes = 0; + if (chan->chan_config.m2m_hw && (cisr & STM32_MDMA_CISR_CRQA)) + state->in_flight_bytes = cbndtr & STM32_MDMA_CBNDTR_BNDT_MASK; + if (!chan->mem_burst) return residue; @@ -1371,11 +1376,10 @@ static enum dma_status stm32_mdma_tx_status(struct dma_chan *c, vdesc = vchan_find_desc(&chan->vchan, cookie); if (chan->desc && cookie == chan->desc->vdesc.tx.cookie) - residue = stm32_mdma_desc_residue(chan, chan->desc, - chan->curr_hwdesc); + residue = stm32_mdma_desc_residue(chan, chan->desc, chan->curr_hwdesc, state); else if (vdesc) - residue = stm32_mdma_desc_residue(chan, - to_stm32_mdma_desc(vdesc), 0); + residue = stm32_mdma_desc_residue(chan, to_stm32_mdma_desc(vdesc), 0, state); + dma_set_residue(state, residue); spin_unlock_irqrestore(&chan->vchan.lock, flags); -- cgit v1.2.3 From 2df467e908ce463cff1431ca1b00f650f7a514b4 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 4 Oct 2023 17:50:23 +0200 Subject: dmaengine: stm32-dma: fix stm32_dma_prep_slave_sg in case of MDMA chaining Current Target (CT) have to be reset when starting an MDMA chaining use case, as Double Buffer mode is activated. It ensures the DMA will start processing the first memory target (pointed with SxM0AR). Fixes: 723795173ce1 ("dmaengine: stm32-dma: add support to trigger STM32 MDMA") Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231004155024.2609531-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 5c36811aa134..7427acc82259 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -1113,8 +1113,10 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg( chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL; /* Activate Double Buffer Mode if DMA triggers STM32 MDMA and more than 1 sg */ - if (chan->trig_mdma && sg_len > 1) + if (chan->trig_mdma && sg_len > 1) { chan->chan_reg.dma_scr |= STM32_DMA_SCR_DBM; + chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_CT; + } for_each_sg(sgl, sg, sg_len, i) { ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, -- cgit v1.2.3 From 67e13e89742c3b21ce177f612bf9ef32caae6047 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 4 Oct 2023 17:50:24 +0200 Subject: dmaengine: stm32-dma: fix residue in case of MDMA chaining In case of MDMA chaining, DMA is configured in Double-Buffer Mode (DBM) with two periods, but if transfer has been prepared with _prep_slave_sg(), the transfer is not marked cyclic (=!chan->desc->cyclic). However, as DBM is activated for MDMA chaining, residue computation must take into account cyclic constraints. With only two periods in MDMA chaining, and no update due to Transfer Complete interrupt masked, n_sg is always 0. If DMA current memory address (depending on SxCR.CT and SxM0AR/SxM1AR) does not correspond, it means n_sg should be increased. Then, the residue of the current period is the one read from SxNDTR and should not be overwritten with the full period length. Fixes: 723795173ce1 ("dmaengine: stm32-dma: add support to trigger STM32 MDMA") Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231004155024.2609531-2-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 7427acc82259..0b30151fb45c 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -1389,11 +1389,12 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, residue = stm32_dma_get_remaining_bytes(chan); - if (chan->desc->cyclic && !stm32_dma_is_current_sg(chan)) { + if ((chan->desc->cyclic || chan->trig_mdma) && !stm32_dma_is_current_sg(chan)) { n_sg++; if (n_sg == chan->desc->num_sgs) n_sg = 0; - residue = sg_req->len; + if (!chan->trig_mdma) + residue = sg_req->len; } /* @@ -1403,7 +1404,7 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, * residue = remaining bytes from NDTR + remaining * periods/sg to be transferred */ - if (!chan->desc->cyclic || n_sg != 0) + if ((!chan->desc->cyclic && !chan->trig_mdma) || n_sg != 0) for (i = n_sg; i < desc->num_sgs; i++) residue += desc->sg_req[i].len; -- cgit v1.2.3 From 3fa53518ad419bfacceae046a9d8027e4c4c5290 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 4 Oct 2023 10:29:11 -0400 Subject: dmaengine: fsl-edma: fix all channels requested when call fsl_edma3_xlate() dma_get_slave_channel() increases client_count for all channels. It should only be called when a matched channel is found in fsl_edma3_xlate(). Move dma_get_slave_channel() after checking for a matched channel. Cc: stable@vger.kernel.org Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20231004142911.838916-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 621a460fae0c..8c4ed7012e23 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -154,18 +154,20 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec, fsl_chan = to_fsl_edma_chan(chan); i = fsl_chan - fsl_edma->chans; - chan = dma_get_slave_channel(chan); - chan->device->privatecnt++; fsl_chan->priority = dma_spec->args[1]; fsl_chan->is_rxchan = dma_spec->args[2] & ARGS_RX; fsl_chan->is_remote = dma_spec->args[2] & ARGS_REMOTE; fsl_chan->is_multi_fifo = dma_spec->args[2] & ARGS_MULTI_FIFO; if (!b_chmux && i == dma_spec->args[0]) { + chan = dma_get_slave_channel(chan); + chan->device->privatecnt++; mutex_unlock(&fsl_edma->fsl_edma_mutex); return chan; } else if (b_chmux && !fsl_chan->srcid) { /* if controller support channel mux, choose a free channel */ + chan = dma_get_slave_channel(chan); + chan->device->privatecnt++; fsl_chan->srcid = dma_spec->args[0]; mutex_unlock(&fsl_edma->fsl_edma_mutex); return chan; -- cgit v1.2.3 From 8dafa9d0eb1a1550a0f4d462db9354161bc51e0c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 Oct 2023 21:24:45 +0200 Subject: sched/eevdf: Fix min_deadline heap integrity Marek and Biju reported instances of: "EEVDF scheduling fail, picking leftmost" which Mike correlated with cgroup scheduling and the min_deadline heap getting corrupted; some trace output confirms: > And yeah, min_deadline is hosed somehow: > > validate_cfs_rq: --- / > __print_se: ffff88845cf48080 w: 1024 ve: -58857638 lag: 870381 vd: -55861854 vmd: -66302085 E (11372/tr) > __print_se: ffff88810d165800 w: 25 ve: -80323686 lag: 22336429 vd: -41496434 vmd: -66302085 E (-1//autogroup-31) > __print_se: ffff888108379000 w: 25 ve: 0 lag: -57987257 vd: 114632828 vmd: 114632828 N (-1//autogroup-33) > validate_cfs_rq: min_deadline: -55861854 avg_vruntime: -62278313462 / 1074 = -57987256 Turns out that reweight_entity(), which tries really hard to be fast, does not do the normal dequeue+update+enqueue pattern but *does* scale the deadline. However, it then fails to propagate the updated deadline value up the heap. Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy") Reported-by: Marek Szyprowski Reported-by: Biju Das Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marek Szyprowski Tested-by: Biju Das Tested-by: Mike Galbraith Link: https://lkml.kernel.org/r/20231006192445.GE743@noisy.programming.kicks-ass.net --- kernel/sched/fair.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ef7490c4b8b4..a4b904a010c6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3613,6 +3613,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, */ deadline = div_s64(deadline * old_weight, weight); se->deadline = se->vruntime + deadline; + min_deadline_cb_propagate(&se->run_node, NULL); } #ifdef CONFIG_SMP -- cgit v1.2.3 From b01db23d5923a35023540edc4f0c5f019e11ac7d Mon Sep 17 00:00:00 2001 From: Benjamin Segall Date: Fri, 29 Sep 2023 17:09:30 -0700 Subject: sched/eevdf: Fix pick_eevdf() The old pick_eevdf() could fail to find the actual earliest eligible deadline when it descended to the right looking for min_deadline, but it turned out that that min_deadline wasn't actually eligible. In that case we need to go back and search through any left branches we skipped looking for the actual best _eligible_ min_deadline. This is more expensive, but still O(log n), and at worst should only involve descending two branches of the rbtree. I've run this through a userspace stress test (thank you tools/lib/rbtree.c), so hopefully this implementation doesn't miss any corner cases. Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy") Signed-off-by: Ben Segall Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/xm261qego72d.fsf_-_@google.com --- kernel/sched/fair.c | 72 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a4b904a010c6..061a30a8925a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -872,14 +872,16 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) * * Which allows an EDF like search on (sub)trees. */ -static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq) { struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node; struct sched_entity *curr = cfs_rq->curr; struct sched_entity *best = NULL; + struct sched_entity *best_left = NULL; if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr))) curr = NULL; + best = curr; /* * Once selected, run a task until it either becomes non-eligible or @@ -900,33 +902,75 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) } /* - * If this entity has an earlier deadline than the previous - * best, take this one. If it also has the earliest deadline - * of its subtree, we're done. + * Now we heap search eligible trees for the best (min_)deadline */ - if (!best || deadline_gt(deadline, best, se)) { + if (!best || deadline_gt(deadline, best, se)) best = se; - if (best->deadline == best->min_deadline) - break; - } /* - * If the earlest deadline in this subtree is in the fully - * eligible left half of our space, go there. + * Every se in a left branch is eligible, keep track of the + * branch with the best min_deadline */ + if (node->rb_left) { + struct sched_entity *left = __node_2_se(node->rb_left); + + if (!best_left || deadline_gt(min_deadline, best_left, left)) + best_left = left; + + /* + * min_deadline is in the left branch. rb_left and all + * descendants are eligible, so immediately switch to the second + * loop. + */ + if (left->min_deadline == se->min_deadline) + break; + } + + /* min_deadline is at this node, no need to look right */ + if (se->deadline == se->min_deadline) + break; + + /* else min_deadline is in the right branch. */ + node = node->rb_right; + } + + /* + * We ran into an eligible node which is itself the best. + * (Or nr_running == 0 and both are NULL) + */ + if (!best_left || (s64)(best_left->min_deadline - best->deadline) > 0) + return best; + + /* + * Now best_left and all of its children are eligible, and we are just + * looking for deadline == min_deadline + */ + node = &best_left->run_node; + while (node) { + struct sched_entity *se = __node_2_se(node); + + /* min_deadline is the current node */ + if (se->deadline == se->min_deadline) + return se; + + /* min_deadline is in the left branch */ if (node->rb_left && __node_2_se(node->rb_left)->min_deadline == se->min_deadline) { node = node->rb_left; continue; } + /* else min_deadline is in the right branch */ node = node->rb_right; } + return NULL; +} - if (!best || (curr && deadline_gt(deadline, best, curr))) - best = curr; +static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) +{ + struct sched_entity *se = __pick_eevdf(cfs_rq); - if (unlikely(!best)) { + if (!se) { struct sched_entity *left = __pick_first_entity(cfs_rq); if (left) { pr_err("EEVDF scheduling fail, picking leftmost\n"); @@ -934,7 +978,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) } } - return best; + return se; } #ifdef CONFIG_SCHED_DEBUG -- cgit v1.2.3 From 15c0a870dc44ed14e01efbdd319d232234ee639f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 6 Sep 2023 14:22:07 +0800 Subject: ceph: fix incorrect revoked caps assert in ceph_fill_file_size() When truncating the inode the MDS will acquire the xlock for the ifile Locker, which will revoke the 'Frwsxl' caps from the clients. But when the client just releases and flushes the 'Fw' caps to MDS, for exmaple, and once the MDS receives the caps flushing msg it just thought the revocation has finished. Then the MDS will continue truncating the inode and then issued the truncate notification to all the clients. While just before the clients receives the cap flushing ack they receive the truncation notification, the clients will detecte that the 'issued | dirty' is still holding the 'Fw' caps. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/56693 Fixes: b0d7c2231015 ("ceph: introduce i_truncate_mutex") Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 800ab7920513..b79100f720b3 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -769,9 +769,7 @@ int ceph_fill_file_size(struct inode *inode, int issued, ci->i_truncate_seq = truncate_seq; /* the MDS should have revoked these caps */ - WARN_ON_ONCE(issued & (CEPH_CAP_FILE_EXCL | - CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR | + WARN_ON_ONCE(issued & (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_LAZYIO)); /* * If we hold relevant caps, or in the case where we're -- cgit v1.2.3 From 42b71826fe5d01f3e6cdddc91f81d0e4afb91801 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 29 Sep 2023 10:12:06 +0100 Subject: ceph: remove unnecessary IS_ERR() check in ceph_fname_to_usr() Before returning, function ceph_fname_to_usr() does a final IS_ERR() check in 'dir': if ((dir != fname->dir) && !IS_ERR(dir)) {...} This check is unnecessary because, if the 'dir' variable has changed to something other than 'fname->dir' (it's initial value), that error check has been performed already and, if there was indeed an error, it would have been returned immediately. Besides, this useless IS_ERR() is also confusing static analysis tools. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202309282202.xZxGdvS3-lkp@intel.com/ Signed-off-by: Luis Henriques Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index e1f31b86fd48..5b5112c78462 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -460,7 +460,7 @@ int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname, out: fscrypt_fname_free_buffer(&_tname); out_inode: - if ((dir != fname->dir) && !IS_ERR(dir)) { + if (dir != fname->dir) { if ((dir->i_state & I_NEW)) discard_new_inode(dir); else -- cgit v1.2.3 From 7563cf17dce0a875ba3d872acdc63a78ea344019 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Wed, 4 Oct 2023 18:38:27 -0500 Subject: libceph: use kernel_connect() Direct calls to ops->connect() can overwrite the address parameter when used in conjunction with BPF SOCK_ADDR hooks. Recent changes to kernel_connect() ensure that callers are insulated from such side effects. This patch wraps the direct call to ops->connect() with kernel_connect() to prevent unexpected changes to the address passed to ceph_tcp_connect(). This change was originally part of a larger patch targeting the net tree addressing all instances of unprotected calls to ops->connect() throughout the kernel, but this change was split up into several patches targeting various trees. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/netdev/20230821100007.559638-1-jrife@google.com/ Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.camel@redhat.com/ Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect") Signed-off-by: Jordan Rife Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 10a41cd9c523..3c8b78d9c4d1 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -459,8 +459,8 @@ int ceph_tcp_connect(struct ceph_connection *con) set_sock_callbacks(sock, con); con_sock_state_connecting(con); - ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss), - O_NONBLOCK); + ret = kernel_connect(sock, (struct sockaddr *)&ss, sizeof(ss), + O_NONBLOCK); if (ret == -EINPROGRESS) { dout("connect %s EINPROGRESS sk_state = %u\n", ceph_pr_addr(&con->peer_addr), -- cgit v1.2.3 From 07bb00ef00ace88dd6f695fadbba76565756e55c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 7 Oct 2023 11:52:39 +0300 Subject: ceph: fix type promotion bug on 32bit systems In this code "ret" is type long and "src_objlen" is unsigned int. The problem is that on 32bit systems, when we do the comparison signed longs are type promoted to unsigned int. So negative error codes from do_splice_direct() are treated as success instead of failure. Cc: stable@vger.kernel.org Fixes: 1b0c3b9f91f0 ("ceph: re-org copy_file_range and fix some error paths") Signed-off-by: Dan Carpenter Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b1da02f5dbe3..b5f8038065d7 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -2969,7 +2969,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, ret = do_splice_direct(src_file, &src_off, dst_file, &dst_off, src_objlen, flags); /* Abort on short copies or on error */ - if (ret < src_objlen) { + if (ret < (long)src_objlen) { dout("Failed partial copy (%zd)\n", ret); goto out; } -- cgit v1.2.3 From 5d9cea8a552ee122e21fbd5a3c5d4eb85f648e06 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:15 +0200 Subject: powerpc/8xx: Fix pte_access_permitted() for PAGE_NONE On 8xx, PAGE_NONE is handled by setting _PAGE_NA instead of clearing _PAGE_USER. But then pte_user() returns 1 also for PAGE_NONE. As _PAGE_NA prevent reads, add a specific version of pte_read() that returns 0 when _PAGE_NA is set instead of always returning 1. Fixes: 351750331fc1 ("powerpc/mm: Introduce _PAGE_NA") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/57bcfbe578e43123f9ed73e040229b80f1ad56ec.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 7 +++++++ arch/powerpc/include/asm/nohash/pgtable.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 21f681ee535a..e6fe1d5731f2 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -94,6 +94,13 @@ static inline pte_t pte_wrprotect(pte_t pte) #define pte_wrprotect pte_wrprotect +static inline int pte_read(pte_t pte) +{ + return (pte_val(pte) & _PAGE_RO) != _PAGE_NA; +} + +#define pte_read pte_read + static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RO); diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 56ea48276356..c721478c5934 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -25,7 +25,9 @@ static inline int pte_write(pte_t pte) return pte_val(pte) & _PAGE_RW; } #endif +#ifndef pte_read static inline int pte_read(pte_t pte) { return 1; } +#endif static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } -- cgit v1.2.3 From 5ea0bbaa32e8f54e9a57cfee4a3b8769b80be0d2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:16 +0200 Subject: powerpc/64e: Fix wrong test in __ptep_test_and_clear_young() Commit 45201c879469 ("powerpc/nohash: Remove hash related code from nohash headers.") replaced: if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) return 0; By: if (pte_young(*ptep)) return 0; But it should be: if (!pte_young(*ptep)) return 0; Fix it. Fixes: 45201c879469 ("powerpc/nohash: Remove hash related code from nohash headers.") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/8bb7f06494e21adada724ede47a4c3d97e879d40.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/64/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 5cd9acf58a7d..eb6891e34cbd 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -197,7 +197,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, { unsigned long old; - if (pte_young(*ptep)) + if (!pte_young(*ptep)) return 0; old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); return (old & _PAGE_ACCESSED) != 0; -- cgit v1.2.3 From c6df843348d6b71ea986266c12831cb60c2cf325 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Oct 2023 10:21:04 +0200 Subject: regmap: fix NULL deref on lookup Not all regmaps have a name so make sure to check for that to avoid dereferencing a NULL pointer when dev_get_regmap() is used to lookup a named regmap. Fixes: e84861fec32d ("regmap: dev_get_regmap_match(): fix string comparison") Cc: stable@vger.kernel.org # 5.8 Cc: Marc Kleine-Budde Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231006082104.16707-1-johan+linaro@kernel.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 884cb51c8f67..234a84ecde8b 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1478,7 +1478,7 @@ static int dev_get_regmap_match(struct device *dev, void *res, void *data) /* If the user didn't specify a name match any */ if (data) - return !strcmp((*r)->name, data); + return (*r)->name && !strcmp((*r)->name, data); else return 1; } -- cgit v1.2.3 From f9b3ea02555e67e2e7bf95219953b88d122bd275 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Oct 2023 14:11:01 +0200 Subject: ACPI: resource: Add TongFang GM6BGEQ, GM6BG5Q and GM6BG0Q to irq1_edge_low_force_override[] The TongFang GM6BGEQ, GM6BG5Q and GM6BG0Q are 3 GPU variants of a TongFang barebone design which is sold under various brand names. The ACPI IRQ override for the keyboard IRQ must be used on these AMD Zen laptops in order for the IRQ to work. Adjust the pcspecialist_laptop[] DMI match table for this: 1. Drop the sys-vendor match from the existing PCSpecialist Elimina Pro 16 entry for the GM6BGEQ (RTX3050 GPU) model so that it will also match the laptop when sold by other vendors such as hyperbook.pl. 2. Add board-name matches for the GM6BG5Q (RTX4050) and GM6B0Q (RTX4060) models. Note the .ident values of the dmi_system_id structs are left unset since these are not used. Suggested-by: August Wikerfors Reported-by: Francesco Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217394 Link: https://laptopparts4less.frl/index.php?route=product/search&filter_name=GM6BG Link: https://hyperbook.pl/en/content/14-hyperbook-drivers Link: https://linux-hardware.org/?probe=bfa70344e3 Link: https://bbs.archlinuxcn.org/viewtopic.php?id=13313 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 8116b55b6c98..297a88587031 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -507,16 +507,23 @@ static const struct dmi_system_id maingear_laptop[] = { static const struct dmi_system_id pcspecialist_laptop[] = { { - .ident = "PCSpecialist Elimina Pro 16 M", - /* - * Some models have product-name "Elimina Pro 16 M", - * others "GM6BGEQ". Match on board-name to match both. - */ + /* TongFang GM6BGEQ / PCSpecialist Elimina Pro 16 M, RTX 3050 */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"), DMI_MATCH(DMI_BOARD_NAME, "GM6BGEQ"), }, }, + { + /* TongFang GM6BG5Q, RTX 4050 */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM6BG5Q"), + }, + }, + { + /* TongFang GM6BG0Q / PCSpecialist Elimina Pro 16 M, RTX 4060 */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM6BG0Q"), + }, + }, { } }; -- cgit v1.2.3 From 1ca0b605150501b7dc59f3016271da4eb3e96fce Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Mon, 9 Oct 2023 15:58:11 +0200 Subject: cgroup: Remove duplicates in cgroup v1 tasks file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One PID may appear multiple times in a preloaded pidlist. (Possibly due to PID recycling but we have reports of the same task_struct appearing with different PIDs, thus possibly involving transfer of PID via de_thread().) Because v1 seq_file iterator uses PIDs as position, it leads to a message: > seq_file: buggy .next function kernfs_seq_next did not update position index Conservative and quick fix consists of removing duplicates from `tasks` file (as opposed to removing pidlists altogether). It doesn't affect correctness (it's sufficient to show a PID once), performance impact would be hidden by unconditional sorting of the pidlist already in place (asymptotically). Link: https://lore.kernel.org/r/20230823174804.23632-1-mkoutny@suse.com/ Suggested-by: Firo Yang Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- kernel/cgroup/cgroup-v1.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index c487ffef6652..76db6c67e39a 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -360,10 +360,9 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, } css_task_iter_end(&it); length = n; - /* now sort & (if procs) strip out duplicates */ + /* now sort & strip out duplicates (tgids or recycled thread PIDs) */ sort(array, length, sizeof(pid_t), cmppid, NULL); - if (type == CGROUP_FILE_PROCS) - length = pidlist_uniq(array, length); + length = pidlist_uniq(array, length); l = cgroup_pidlist_find_create(cgrp, type); if (!l) { -- cgit v1.2.3 From 39465cac283702a7d4a507a558db81898029c6d3 Mon Sep 17 00:00:00 2001 From: Konstantin Meskhidze Date: Tue, 5 Sep 2023 18:02:03 +0800 Subject: drm/vmwgfx: fix typo of sizeof argument Since size of 'header' pointer and '*header' structure is equal on 64-bit machines issue probably didn't cause any wrong behavior. But anyway, fixing typo is required. Fixes: 7a73ba7469cb ("drm/vmwgfx: Use TTM handles instead of SIDs as user-space surface handles.") Co-developed-by: Ivanov Mikhail Signed-off-by: Konstantin Meskhidze Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20230905100203.1716731-1-konstantin.meskhidze@huawei.com --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 98e0723ca6f5..cc3f301ca163 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1619,7 +1619,7 @@ static int vmw_cmd_tex_state(struct vmw_private *dev_priv, { VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdSetTextureState); SVGA3dTextureState *last_state = (SVGA3dTextureState *) - ((unsigned long) header + header->size + sizeof(header)); + ((unsigned long) header + header->size + sizeof(*header)); SVGA3dTextureState *cur_state = (SVGA3dTextureState *) ((unsigned long) header + sizeof(*cmd)); struct vmw_resource *ctx; -- cgit v1.2.3 From 91398b413d03660fd5828f7b4abc64e884b98069 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 28 Sep 2023 00:13:55 -0400 Subject: drm/vmwgfx: Keep a gem reference to user bos in surfaces Surfaces can be backed (i.e. stored in) memory objects (mob's) which are created and managed by the userspace as GEM buffers. Surfaces grab only a ttm reference which means that the gem object can be deleted underneath us, especially in cases where prime buffer export is used. Make sure that all userspace surfaces which are backed by gem objects hold a gem reference to make sure they're not deleted before vmw surfaces are done with them, which fixes: ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 2 PID: 2632 at lib/refcount.c:28 refcount_warn_saturate+0xfb/0x150 Modules linked in: overlay vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vsock snd_ens1371 snd_ac97_codec ac97_bus snd_pcm gameport> CPU: 2 PID: 2632 Comm: vmw_ref_count Not tainted 6.5.0-rc2-vmwgfx #1 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 RIP: 0010:refcount_warn_saturate+0xfb/0x150 Code: eb 9e 0f b6 1d 8b 5b a6 01 80 fb 01 0f 87 ba e4 80 00 83 e3 01 75 89 48 c7 c7 c0 3c f9 a3 c6 05 6f 5b a6 01 01 e8 15 81 98 ff <0f> 0b e9 6f ff ff ff 0f b> RSP: 0018:ffffbdc34344bba0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000027 RDX: ffff960475ea1548 RSI: 0000000000000001 RDI: ffff960475ea1540 RBP: ffffbdc34344bba8 R08: 0000000000000003 R09: 65646e75203a745f R10: ffffffffa5b32b20 R11: 72657466612d6573 R12: ffff96037d6a6400 R13: ffff9603484805b0 R14: 000000000000000b R15: ffff9603bed06060 FS: 00007f5fd8520c40(0000) GS:ffff960475e80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f5fda755000 CR3: 000000010d012005 CR4: 00000000003706e0 Call Trace: ? show_regs+0x6e/0x80 ? refcount_warn_saturate+0xfb/0x150 ? __warn+0x91/0x150 ? refcount_warn_saturate+0xfb/0x150 ? report_bug+0x19d/0x1b0 ? handle_bug+0x46/0x80 ? exc_invalid_op+0x1d/0x80 ? asm_exc_invalid_op+0x1f/0x30 ? refcount_warn_saturate+0xfb/0x150 drm_gem_object_handle_put_unlocked+0xba/0x110 [drm] drm_gem_object_release_handle+0x6e/0x80 [drm] drm_gem_handle_delete+0x6a/0xc0 [drm] ? __pfx_vmw_bo_unref_ioctl+0x10/0x10 [vmwgfx] vmw_bo_unref_ioctl+0x33/0x40 [vmwgfx] drm_ioctl_kernel+0xbc/0x160 [drm] drm_ioctl+0x2d2/0x580 [drm] ? __pfx_vmw_bo_unref_ioctl+0x10/0x10 [vmwgfx] ? do_vmi_munmap+0xee/0x180 vmw_generic_ioctl+0xbd/0x180 [vmwgfx] vmw_unlocked_ioctl+0x19/0x20 [vmwgfx] __x64_sys_ioctl+0x99/0xd0 do_syscall_64+0x5d/0x90 ? syscall_exit_to_user_mode+0x2a/0x50 ? do_syscall_64+0x6d/0x90 ? handle_mm_fault+0x16e/0x2f0 ? exit_to_user_mode_prepare+0x34/0x170 ? irqentry_exit_to_user_mode+0xd/0x20 ? irqentry_exit+0x3f/0x50 ? exc_page_fault+0x8e/0x190 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 RIP: 0033:0x7f5fda51aaff Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 7> RSP: 002b:00007ffd536a4d30 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffd536a4de0 RCX: 00007f5fda51aaff RDX: 00007ffd536a4de0 RSI: 0000000040086442 RDI: 0000000000000003 RBP: 0000000040086442 R08: 000055fa603ada50 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000246 R12: 00007ffd536a51b8 R13: 0000000000000003 R14: 000055fa5ebb4c80 R15: 00007f5fda90f040 ---[ end trace 0000000000000000 ]--- A lot of the analyis on the bug was done by Murray McAllister and Ian Forbes. Reported-by: Murray McAllister Cc: Ian Forbes Signed-off-by: Zack Rusin Fixes: a950b989ea29 ("drm/vmwgfx: Do not drop the reference to the handle too soon") Cc: # v6.2+ Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20230928041355.737635-1-zack@kde.org --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 7 ++++--- drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 17 ++++++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c | 6 +++--- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 ++++ drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 10 ++++++---- drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 18 +++++++++++++++--- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 6 +++--- drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 12 ++++++------ drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 31 ++++++++++++------------------- 11 files changed, 68 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index c43853597776..2bfac3aad7b7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -34,6 +34,8 @@ static void vmw_bo_release(struct vmw_bo *vbo) { + WARN_ON(vbo->tbo.base.funcs && + kref_read(&vbo->tbo.base.refcount) != 0); vmw_bo_unmap(vbo); drm_gem_object_release(&vbo->tbo.base); } @@ -497,7 +499,7 @@ static int vmw_user_bo_synccpu_release(struct drm_file *filp, if (!(flags & drm_vmw_synccpu_allow_cs)) { atomic_dec(&vmw_bo->cpu_writers); } - vmw_user_bo_unref(vmw_bo); + vmw_user_bo_unref(&vmw_bo); } return ret; @@ -539,7 +541,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data, return ret; ret = vmw_user_bo_synccpu_grab(vbo, arg->flags); - vmw_user_bo_unref(vbo); + vmw_user_bo_unref(&vbo); if (unlikely(ret != 0)) { if (ret == -ERESTARTSYS || ret == -EBUSY) return -EBUSY; @@ -612,7 +614,6 @@ int vmw_user_bo_lookup(struct drm_file *filp, } *out = to_vmw_bo(gobj); - ttm_bo_get(&(*out)->tbo); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 1d433fceed3d..0d496dc9c6af 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -195,12 +195,19 @@ static inline struct vmw_bo *vmw_bo_reference(struct vmw_bo *buf) return buf; } -static inline void vmw_user_bo_unref(struct vmw_bo *vbo) +static inline struct vmw_bo *vmw_user_bo_ref(struct vmw_bo *vbo) { - if (vbo) { - ttm_bo_put(&vbo->tbo); - drm_gem_object_put(&vbo->tbo.base); - } + drm_gem_object_get(&vbo->tbo.base); + return vbo; +} + +static inline void vmw_user_bo_unref(struct vmw_bo **buf) +{ + struct vmw_bo *tmp_buf = *buf; + + *buf = NULL; + if (tmp_buf) + drm_gem_object_put(&tmp_buf->tbo.base); } static inline struct vmw_bo *to_vmw_bo(struct drm_gem_object *gobj) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c index c0b24d1cacbf..a7c07692262b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c @@ -432,7 +432,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) * for the new COTable. Initially pin the buffer object to make sure * we can use tryreserve without failure. */ - ret = vmw_bo_create(dev_priv, &bo_params, &buf); + ret = vmw_gem_object_create(dev_priv, &bo_params, &buf); if (ret) { DRM_ERROR("Failed initializing new cotable MOB.\n"); goto out_done; @@ -502,7 +502,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) vmw_resource_mob_attach(res); /* Let go of the old mob. */ - vmw_bo_unreference(&old_buf); + vmw_user_bo_unref(&old_buf); res->id = vcotbl->type; ret = dma_resv_reserve_fences(bo->base.resv, 1); @@ -521,7 +521,7 @@ out_map_new: out_wait: ttm_bo_unpin(bo); ttm_bo_unreserve(bo); - vmw_bo_unreference(&buf); + vmw_user_bo_unref(&buf); out_done: MKS_STAT_TIME_POP(MKSSTAT_KERN_COTABLE_RESIZE); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 58bfdf203eca..3cd5090dedfc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -853,6 +853,10 @@ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) /** * GEM related functionality - vmwgfx_gem.c */ +struct vmw_bo_params; +int vmw_gem_object_create(struct vmw_private *vmw, + struct vmw_bo_params *params, + struct vmw_bo **p_vbo); extern int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, struct drm_file *filp, uint32_t size, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index cc3f301ca163..36987ef3fc30 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1151,7 +1151,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, SVGAMobId *id, struct vmw_bo **vmw_bo_p) { - struct vmw_bo *vmw_bo; + struct vmw_bo *vmw_bo, *tmp_bo; uint32_t handle = *id; struct vmw_relocation *reloc; int ret; @@ -1164,7 +1164,8 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, } vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB); ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo); - vmw_user_bo_unref(vmw_bo); + tmp_bo = vmw_bo; + vmw_user_bo_unref(&tmp_bo); if (unlikely(ret != 0)) return ret; @@ -1206,7 +1207,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, SVGAGuestPtr *ptr, struct vmw_bo **vmw_bo_p) { - struct vmw_bo *vmw_bo; + struct vmw_bo *vmw_bo, *tmp_bo; uint32_t handle = ptr->gmrId; struct vmw_relocation *reloc; int ret; @@ -1220,7 +1221,8 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM); ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo); - vmw_user_bo_unref(vmw_bo); + tmp_bo = vmw_bo; + vmw_user_bo_unref(&tmp_bo); if (unlikely(ret != 0)) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index c0da89e16e6f..8b1eb0061610 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -111,6 +111,20 @@ static const struct drm_gem_object_funcs vmw_gem_object_funcs = { .vm_ops = &vmw_vm_ops, }; +int vmw_gem_object_create(struct vmw_private *vmw, + struct vmw_bo_params *params, + struct vmw_bo **p_vbo) +{ + int ret = vmw_bo_create(vmw, params, p_vbo); + + if (ret != 0) + goto out_no_bo; + + (*p_vbo)->tbo.base.funcs = &vmw_gem_object_funcs; +out_no_bo: + return ret; +} + int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, struct drm_file *filp, uint32_t size, @@ -126,12 +140,10 @@ int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, .pin = false }; - ret = vmw_bo_create(dev_priv, ¶ms, p_vbo); + ret = vmw_gem_object_create(dev_priv, ¶ms, p_vbo); if (ret != 0) goto out_no_bo; - (*p_vbo)->tbo.base.funcs = &vmw_gem_object_funcs; - ret = drm_gem_handle_create(filp, &(*p_vbo)->tbo.base, handle); out_no_bo: return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 1489ad73c103..818b7f109f53 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1471,8 +1471,8 @@ static int vmw_create_bo_proxy(struct drm_device *dev, /* Reserve and switch the backing mob. */ mutex_lock(&res->dev_priv->cmdbuf_mutex); (void) vmw_resource_reserve(res, false, true); - vmw_bo_unreference(&res->guest_memory_bo); - res->guest_memory_bo = vmw_bo_reference(bo_mob); + vmw_user_bo_unref(&res->guest_memory_bo); + res->guest_memory_bo = vmw_user_bo_ref(bo_mob); res->guest_memory_offset = 0; vmw_resource_unreserve(res, false, false, false, NULL, 0); mutex_unlock(&res->dev_priv->cmdbuf_mutex); @@ -1666,7 +1666,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, err_out: /* vmw_user_lookup_handle takes one ref so does new_fb */ if (bo) - vmw_user_bo_unref(bo); + vmw_user_bo_unref(&bo); if (surface) vmw_surface_unreference(&surface); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index fb85f244c3d0..c45b4724e414 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -451,7 +451,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data, ret = vmw_overlay_update_stream(dev_priv, buf, arg, true); - vmw_user_bo_unref(buf); + vmw_user_bo_unref(&buf); out_unlock: mutex_unlock(&overlay->mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 71eeabf001c8..ca300c7427d2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -141,7 +141,7 @@ static void vmw_resource_release(struct kref *kref) if (res->coherent) vmw_bo_dirty_release(res->guest_memory_bo); ttm_bo_unreserve(bo); - vmw_bo_unreference(&res->guest_memory_bo); + vmw_user_bo_unref(&res->guest_memory_bo); } if (likely(res->hw_destroy != NULL)) { @@ -338,7 +338,7 @@ static int vmw_resource_buf_alloc(struct vmw_resource *res, return 0; } - ret = vmw_bo_create(res->dev_priv, &bo_params, &gbo); + ret = vmw_gem_object_create(res->dev_priv, &bo_params, &gbo); if (unlikely(ret != 0)) goto out_no_bo; @@ -457,11 +457,11 @@ void vmw_resource_unreserve(struct vmw_resource *res, vmw_resource_mob_detach(res); if (res->coherent) vmw_bo_dirty_release(res->guest_memory_bo); - vmw_bo_unreference(&res->guest_memory_bo); + vmw_user_bo_unref(&res->guest_memory_bo); } if (new_guest_memory_bo) { - res->guest_memory_bo = vmw_bo_reference(new_guest_memory_bo); + res->guest_memory_bo = vmw_user_bo_ref(new_guest_memory_bo); /* * The validation code should already have added a @@ -551,7 +551,7 @@ out_no_reserve: ttm_bo_put(val_buf->bo); val_buf->bo = NULL; if (guest_memory_dirty) - vmw_bo_unreference(&res->guest_memory_bo); + vmw_user_bo_unref(&res->guest_memory_bo); return ret; } @@ -727,7 +727,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr, goto out_no_validate; else if (!res->func->needs_guest_memory && res->guest_memory_bo) { WARN_ON_ONCE(vmw_resource_mob_attached(res)); - vmw_bo_unreference(&res->guest_memory_bo); + vmw_user_bo_unref(&res->guest_memory_bo); } return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index 1e81ff2422cf..a01ca3226d0a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -180,7 +180,7 @@ static int vmw_gb_shader_init(struct vmw_private *dev_priv, res->guest_memory_size = size; if (byte_code) { - res->guest_memory_bo = vmw_bo_reference(byte_code); + res->guest_memory_bo = vmw_user_bo_ref(byte_code); res->guest_memory_offset = offset; } shader->size = size; @@ -809,7 +809,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv, shader_type, num_input_sig, num_output_sig, tfile, shader_handle); out_bad_arg: - vmw_user_bo_unref(buffer); + vmw_user_bo_unref(&buffer); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 5db403ee8261..3829be282ff0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -686,9 +686,6 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) container_of(base, struct vmw_user_surface, prime.base); struct vmw_resource *res = &user_srf->srf.res; - if (res->guest_memory_bo) - drm_gem_object_put(&res->guest_memory_bo->tbo.base); - *p_base = NULL; vmw_resource_unreference(&res); } @@ -855,23 +852,21 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, * expect a backup buffer to be present. */ if (dev_priv->has_mob && req->shareable) { - uint32_t backup_handle; - - ret = vmw_gem_object_create_with_handle(dev_priv, - file_priv, - res->guest_memory_size, - &backup_handle, - &res->guest_memory_bo); + struct vmw_bo_params params = { + .domain = VMW_BO_DOMAIN_SYS, + .busy_domain = VMW_BO_DOMAIN_SYS, + .bo_type = ttm_bo_type_device, + .size = res->guest_memory_size, + .pin = false + }; + + ret = vmw_gem_object_create(dev_priv, + ¶ms, + &res->guest_memory_bo); if (unlikely(ret != 0)) { vmw_resource_unreference(&res); goto out_unlock; } - vmw_bo_reference(res->guest_memory_bo); - /* - * We don't expose the handle to the userspace and surface - * already holds a gem reference - */ - drm_gem_handle_delete(file_priv, backup_handle); } tmp = vmw_resource_reference(&srf->res); @@ -1512,7 +1507,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev, if (ret == 0) { if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) { VMW_DEBUG_USER("Surface backup buffer too small.\n"); - vmw_bo_unreference(&res->guest_memory_bo); + vmw_user_bo_unref(&res->guest_memory_bo); ret = -EINVAL; goto out_unlock; } else { @@ -1526,8 +1521,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, res->guest_memory_size, &backup_handle, &res->guest_memory_bo); - if (ret == 0) - vmw_bo_reference(res->guest_memory_bo); } if (unlikely(ret != 0)) { -- cgit v1.2.3 From 23645bca98304a2772f0de96f97370dd567d0ae6 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Fri, 29 Sep 2023 13:04:33 -0400 Subject: drm/amd/display: Don't set dpms_off for seamless boot [Why] eDPs fail to light up with seamless boot enabled [How] When seamless boot is enabled don't configure dpms_off in disable_vbios_mode_if_required. Reviewed-by: Charlene Liu Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Tom Chung Signed-off-by: Daniel Miess Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3a9077b60029..d08e60dff46d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1262,6 +1262,9 @@ static void disable_vbios_mode_if_required( if (stream == NULL) continue; + if (stream->apply_seamless_boot_optimization) + continue; + // only looking for first odm pipe if (pipe->prev_odm_pipe) continue; -- cgit v1.2.3 From ff89f064dca38e2203790bf876cc7756b8ab2961 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 6 Oct 2023 14:04:04 +0200 Subject: drm/amdgpu: add missing NULL check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bo->tbo.resource can easily be NULL here. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2902 Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher CC: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index f3ee83cdf97e..d28e21baef16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -252,7 +252,7 @@ static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_res_cursor cursor; - if (bo->tbo.resource->mem_type != TTM_PL_VRAM) + if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM) return false; amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); -- cgit v1.2.3 From 3806a8c64794661b15ff5ed28180ff9a5f79fce8 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sun, 8 Oct 2023 14:46:49 +0800 Subject: drm/amdgpu: fix SI failure due to doorbells allocation SI hardware does not have doorbells at all, however currently the code will try to do the allocation and thus fail, makes SI AMDGPU not usable. Fix this failure by skipping doorbells allocation when doorbells count is zero. Fixes: 54c30d2a8def ("drm/amdgpu: create kernel doorbell pages") Reviewed-by: Shashank Sharma Signed-off-by: Icenowy Zheng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c index da4be0bbb446..8eee5d783a92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c @@ -142,6 +142,10 @@ int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev) int r; int size; + /* SI HW does not have doorbells, skip allocation */ + if (adev->doorbell.num_kernel_doorbells == 0) + return 0; + /* Reserve first num_kernel_doorbells (page-aligned) for kernel ops */ size = ALIGN(adev->doorbell.num_kernel_doorbells * sizeof(u32), PAGE_SIZE); -- cgit v1.2.3 From a20c4350c6a12405b7f732b3ee6801ffe2cc45ce Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 3 Oct 2023 10:20:02 +0800 Subject: scsi: ufs: core: Correct clear TM error log The clear TM function error log status was inverted. Fixes: 4693fad7d6d4 ("scsi: ufs: core: Log error handler activity") Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20231003022002.25578-1-peter.wang@mediatek.com Reviewed-by: Bart Van Assche Reviewed-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index c2df07545f96..8382e8cfa414 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6895,7 +6895,7 @@ static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag) mask, 0, 1000, 1000); dev_err(hba->dev, "Clearing task management function with tag %d %s\n", - tag, err ? "succeeded" : "failed"); + tag, err < 0 ? "failed" : "succeeded"); out: return err; -- cgit v1.2.3 From dad4e491e30b20f4dc615c9da65d2142d703b5c2 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Sat, 7 Oct 2023 08:59:53 +0800 Subject: net: ipv6: fix return value check in esp_remove_trailer In esp_remove_trailer(), to avoid an unexpected result returned by pskb_trim, we should check the return value of pskb_trim(). Signed-off-by: Ma Ke Signed-off-by: Steffen Klassert --- net/ipv6/esp6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index fddd0cbdede1..e023d29e919c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -770,7 +770,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb) skb->csum = csum_block_sub(skb->csum, csumdiff, skb->len - trimlen); } - pskb_trim(skb, skb->len - trimlen); + ret = pskb_trim(skb, skb->len - trimlen); + if (unlikely(ret)) + return ret; ret = nexthdr[1]; -- cgit v1.2.3 From 513f61e2193350c7a345da98559b80f61aec4fa6 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Mon, 9 Oct 2023 09:13:37 +0800 Subject: net: ipv4: fix return value check in esp_remove_trailer In esp_remove_trailer(), to avoid an unexpected result returned by pskb_trim, we should check the return value of pskb_trim(). Signed-off-by: Ma Ke Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 2be2d4922557..d18f0f092fe7 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -732,7 +732,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb) skb->csum = csum_block_sub(skb->csum, csumdiff, skb->len - trimlen); } - pskb_trim(skb, skb->len - trimlen); + ret = pskb_trim(skb, skb->len - trimlen); + if (unlikely(ret)) + return ret; ret = nexthdr[1]; -- cgit v1.2.3 From 258dd5e6e65995ee85a941eed9a06708a36b1bfe Mon Sep 17 00:00:00 2001 From: Ruihai Zhou Date: Sat, 7 Oct 2023 14:49:49 +0800 Subject: drm/panel: boe-tv101wum-nl6: Completely pull GPW to VGL before TP term The sta_himax83102 panel sometimes shows abnormally flickering horizontal lines. The front gate output will precharge the X point of the next pole circuit before TP(TouchPanel Enable) term starts, and wait until the end of the TP term to resume the CLK. For this reason, the X point must be maintained during the TP term. In abnormal case, we measured a slight leakage at point X. This because during the TP term, the GPW does not fully pull the VGL low, causing the TFT to not be closed tightly. To fix this, we completely pull GPW to VGL before entering the TP term. This will ensure that the TFT is closed tightly and prevent the abnormal display. Fixes: 1bc2ef065f13 ("drm/panel: Support for Starry-himax83102-j02 TDDI MIPI-DSI panel") Signed-off-by: Ruihai Zhou Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231007064949.22668-1-zhouruihai@huaqin.corp-partner.google.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231007064949.22668-1-zhouruihai@huaqin.corp-partner.google.com --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index 5ac926281d2c..c9087f474cbc 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1342,9 +1342,7 @@ static const struct panel_init_cmd starry_himax83102_j02_init_cmd[] = { _INIT_DCS_CMD(0xB1, 0x01, 0xBF, 0x11), _INIT_DCS_CMD(0xCB, 0x86), _INIT_DCS_CMD(0xD2, 0x3C, 0xFA), - _INIT_DCS_CMD(0xE9, 0xC5), - _INIT_DCS_CMD(0xD3, 0x00, 0x00, 0x00, 0x00, 0x80, 0x0C, 0x01), - _INIT_DCS_CMD(0xE9, 0x3F), + _INIT_DCS_CMD(0xD3, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x0C, 0x01), _INIT_DCS_CMD(0xE7, 0x02, 0x00, 0x28, 0x01, 0x7E, 0x0F, 0x7E, 0x10, 0xA0, 0x00, 0x00, 0x20, 0x40, 0x50, 0x40), _INIT_DCS_CMD(0xBD, 0x02), _INIT_DCS_CMD(0xD8, 0xFF, 0xFF, 0xBF, 0xFE, 0xAA, 0xA0, 0xFF, 0xFF, 0xBF, 0xFE, 0xAA, 0xA0), -- cgit v1.2.3 From 8e8a12ecbc86700b5e1a3596ce2b3c43dafad336 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 17:55:51 +0200 Subject: powerpc/85xx: Fix math emulation exception Booting mpc85xx_defconfig kernel on QEMU leads to: Bad trap at PC: fe9bab0, SR: 2d000, vector=800 awk[82]: unhandled trap (5) at 0 nip fe9bab0 lr fe9e01c code 5 in libc-2.27.so[fe5a000+17a000] awk[82]: code: 3aa00000 3a800010 4bffe03c 9421fff0 7ca62b78 38a00000 93c10008 83c10008 awk[82]: code: 38210010 4bffdec8 9421ffc0 7c0802a6 d8010008 4815190d 93810030 Trace/breakpoint trap WARNING: no useful console This is because allthough CONFIG_MATH_EMULATION is selected, Exception 800 calls unknown_exception(). Call emulation_assist_interrupt() instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/066caa6d9480365da9b8ed83692d7101e10ac5f8.1695657339.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_85xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 97e9ea0c7297..0f1641a31250 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -395,7 +395,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, emulation_assist_interrupt) #endif /* System Call Interrupt */ -- cgit v1.2.3 From 2b7947bd32e243c52870d54141d3b4ea6775e63d Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 5 Oct 2023 13:16:32 +0000 Subject: drm/atomic-helper: relax unregistered connector check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver might pull connectors which weren't submitted by user-space into the atomic state. For instance, intel_dp_mst_atomic_master_trans_check() pulls in connectors sharing the same DP-MST stream. However, if the connector is unregistered, this later fails with: [ 559.425658] i915 0000:00:02.0: [drm:drm_atomic_helper_check_modeset] [CONNECTOR:378:DP-7] is not registered Skip the unregistered connector check to allow user-space to turn off connectors one-by-one. See this wlroots issue: https://gitlab.freedesktop.org/wlroots/wlroots/-/issues/3407 Previous discussion: https://lore.kernel.org/intel-gfx/Y6GX7z17WmDSKwta@ideak-desk.fi.intel.com/ Signed-off-by: Simon Ser Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä Reviewed-by: Lyude Paul Cc: Jani Nikula Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231005131623.114379-1-contact@emersion.fr --- drivers/gpu/drm/drm_atomic_helper.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 292e38eb6218..60794fcde1d5 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -290,7 +290,8 @@ static int update_connector_routing(struct drm_atomic_state *state, struct drm_connector *connector, struct drm_connector_state *old_connector_state, - struct drm_connector_state *new_connector_state) + struct drm_connector_state *new_connector_state, + bool added_by_user) { const struct drm_connector_helper_funcs *funcs; struct drm_encoder *new_encoder; @@ -339,9 +340,13 @@ update_connector_routing(struct drm_atomic_state *state, * there's a chance the connector may have been destroyed during the * process, but it's better to ignore that then cause * drm_atomic_helper_resume() to fail. + * + * Last, we want to ignore connector registration when the connector + * was not pulled in the atomic state by user-space (ie, was pulled + * in by the driver, e.g. when updating a DP-MST stream). */ if (!state->duplicated && drm_connector_is_unregistered(connector) && - crtc_state->active) { + added_by_user && crtc_state->active) { drm_dbg_atomic(connector->dev, "[CONNECTOR:%d:%s] is not registered\n", connector->base.id, connector->name); @@ -620,7 +625,10 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, struct drm_connector *connector; struct drm_connector_state *old_connector_state, *new_connector_state; int i, ret; - unsigned int connectors_mask = 0; + unsigned int connectors_mask = 0, user_connectors_mask = 0; + + for_each_oldnew_connector_in_state(state, connector, old_connector_state, new_connector_state, i) + user_connectors_mask |= BIT(i); for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { bool has_connectors = @@ -685,7 +693,8 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, */ ret = update_connector_routing(state, connector, old_connector_state, - new_connector_state); + new_connector_state, + BIT(i) & user_connectors_mask); if (ret) return ret; if (old_connector_state->crtc) { -- cgit v1.2.3 From f0eee815babed70a749d2496a7678be5b45b4c14 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 10 Oct 2023 22:47:50 +1100 Subject: powerpc/47x: Fix 47x syscall return crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eddie reported that newer kernels were crashing during boot on his 476 FSP2 system: kernel tried to execute user page (b7ee2000) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch Faulting instruction address: 0xb7ee2000 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=4K FSP-2 Modules linked in: CPU: 0 PID: 61 Comm: mount Not tainted 6.1.55-d23900f.ppcnf-fsp2 #1 Hardware name: ibm,fsp2 476fpe 0x7ff520c0 FSP-2 NIP:  b7ee2000 LR: 8c008000 CTR: 00000000 REGS: bffebd83 TRAP: 0400   Not tainted (6.1.55-d23900f.ppcnf-fs p2) MSR:  00000030   CR: 00001000  XER: 20000000 GPR00: c00110ac bffebe63 bffebe7e bffebe88 8c008000 00001000 00000d12 b7ee2000 GPR08: 00000033 00000000 00000000 c139df10 48224824 1016c314 10160000 00000000 GPR16: 10160000 10160000 00000008 00000000 10160000 00000000 10160000 1017f5b0 GPR24: 1017fa50 1017f4f0 1017fa50 1017f740 1017f630 00000000 00000000 1017f4f0 NIP [b7ee2000] 0xb7ee2000 LR [8c008000] 0x8c008000 Call Trace: Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX ---[ end trace 0000000000000000 ]--- The problem is in ret_from_syscall where the check for icache_44x_need_flush is done. When the flush is needed the code jumps out-of-line to do the flush, and then intends to jump back to continue the syscall return. However the branch back to label 1b doesn't return to the correct location, instead branching back just prior to the return to userspace, causing bogus register values to be used by the rfi. The breakage was introduced by commit 6f76a01173cc ("powerpc/syscall: implement system call entry/exit logic in C for PPC32") which inadvertently removed the "1" label and reused it elsewhere. Fix it by adding named local labels in the correct locations. Note that the return label needs to be outside the ifdef so that CONFIG_PPC_47x=n compiles. Fixes: 6f76a01173cc ("powerpc/syscall: implement system call entry/exit logic in C for PPC32") Cc: stable@vger.kernel.org # v5.12+ Reported-by: Eddie James Tested-by: Eddie James Link: https://lore.kernel.org/linuxppc-dev/fdaadc46-7476-9237-e104-1d2168526e72@linux.ibm.com/ Signed-off-by: Michael Ellerman Reviewed-by: Christophe Leroy Link: https://msgid.link/20231010114750.847794-1-mpe@ellerman.id.au --- arch/powerpc/kernel/entry_32.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9692acb0361f..7eda33a24bb4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -137,8 +137,9 @@ ret_from_syscall: lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 - bne- 2f + bne- .L44x_icache_flush #endif /* CONFIG_PPC_47x */ +.L44x_icache_flush_return: kuep_unlock lwz r4,_LINK(r1) lwz r5,_CCR(r1) @@ -172,10 +173,11 @@ syscall_exit_finish: b 1b #ifdef CONFIG_44x -2: li r7,0 +.L44x_icache_flush: + li r7,0 iccci r0,r0 stw r7,icache_44x_need_flush@l(r4) - b 1b + b .L44x_icache_flush_return #endif /* CONFIG_44x */ .globl ret_from_fork -- cgit v1.2.3 From f454b18e07f518bcd0c05af17a2239138bff52de Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 7 Oct 2023 12:57:02 +0200 Subject: x86/cpu: Fix AMD erratum #1485 on Zen4-based CPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix erratum #1485 on Zen4 parts where running with STIBP disabled can cause an #UD exception. The performance impact of the fix is negligible. Reported-by: René Rebe Signed-off-by: Borislav Petkov (AMD) Tested-by: René Rebe Cc: Link: https://lore.kernel.org/r/D99589F4-BC5D-430B-87B2-72C20370CF57@exactcode.com --- arch/x86/include/asm/msr-index.h | 9 +++++++-- arch/x86/kernel/cpu/amd.c | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1d111350197f..b37abb55e948 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -637,12 +637,17 @@ /* AMD Last Branch Record MSRs */ #define MSR_AMD64_LBR_SELECT 0xc000010e -/* Fam 17h MSRs */ -#define MSR_F17H_IRPERF 0xc00000e9 +/* Zen4 */ +#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +/* Zen 2 */ #define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 #define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) +/* Fam 17h MSRs */ +#define MSR_F17H_IRPERF 0xc00000e9 + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 03ef962a6992..ece2b5b7b0fe 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -80,6 +80,10 @@ static const int amd_div0[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); +static const int amd_erratum_1485[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf), + AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf)); + static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { int osvw_id = *erratum++; @@ -1149,6 +1153,10 @@ static void init_amd(struct cpuinfo_x86 *c) pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); setup_force_cpu_bug(X86_BUG_DIV0); } + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + cpu_has_amd_erratum(c, amd_erratum_1485)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 89434b069e460967624903b049e5cf5c9e6b99b9 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Mon, 9 Oct 2023 21:00:58 +0000 Subject: usb: typec: altmodes/displayport: Signal hpd low when exiting mode Upon receiving an ACK for a sent EXIT_MODE message, the DisplayPort driver currently resets the status and configuration of the port partner. The hpd signal is not updated despite being part of the status, so the Display stack can still transmit video despite typec_altmode_exit placing the lanes in a Safe State. Set hpd to low when a sent EXIT_MODE message is ACK'ed. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231009210057.3773877-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 426c88a516e5..59e0218a8bc5 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -304,6 +304,11 @@ static int dp_altmode_vdm(struct typec_altmode *alt, typec_altmode_update_active(alt, false); dp->data.status = 0; dp->data.conf = 0; + if (dp->hpd) { + drm_connector_oob_hotplug_event(dp->connector_fwnode); + dp->hpd = false; + sysfs_notify(&dp->alt->dev.kobj, "displayport", "hpd"); + } break; case DP_CMD_STATUS_UPDATE: dp->data.status = *vdo; -- cgit v1.2.3 From dddb91cde52b4a57fa06a332b230fca3b11b885f Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 10 Oct 2023 17:17:49 +0300 Subject: usb: typec: ucsi: Fix missing link removal The link between the partner device and its USB Power Delivery instance was never removed which prevented the device from being released. Removing the link always when the partner is unregistered. Fixes: b04e1747fbcc ("usb: typec: ucsi: Register USB Power Delivery Capabilities") Cc: stable Reported-by: Douglas Gilbert Closes: https://lore.kernel.org/linux-usb/ZSUMXdw9nanHtnw2@kuha.fi.intel.com/ Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231010141749.3912016-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 509c67c94a70..61b64558f96c 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -787,6 +787,7 @@ static void ucsi_unregister_partner(struct ucsi_connector *con) typec_set_mode(con->port, TYPEC_STATE_SAFE); + typec_partner_set_usb_power_delivery(con->partner, NULL); ucsi_unregister_partner_pdos(con); ucsi_unregister_altmodes(con, UCSI_RECIPIENT_SOP); typec_unregister_partner(con->partner); -- cgit v1.2.3 From c9ca8de2eb15f9da24113e652980c61f95a47530 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Oct 2023 13:46:43 -0500 Subject: usb: typec: ucsi: Use GET_CAPABILITY attributes data to set power supply scope On some OEM systems, adding a W7900 dGPU triggers RAS errors and hangs at a black screen on startup. This issue occurs only if `ucsi_acpi` has loaded before `amdgpu` has loaded. The reason for this failure is that `amdgpu` uses power_supply_is_system_supplied() to determine if running on AC or DC power at startup. If this value is reported incorrectly the dGPU will also be programmed incorrectly and trigger errors. power_supply_is_system_supplied() reports the wrong value because UCSI power supplies provided as part of the system don't properly report the scope as "DEVICE" scope (not powering the system). In order to fix this issue check the capabilities reported from the UCSI power supply to ensure that it supports charging a battery and that it can be powered by AC. Mark the scope accordingly. Cc: stable@vger.kernel.org Fixes: a7fbfd44c020 ("usb: typec: ucsi: Mark dGPUs as DEVICE scope") Link: https://www.intel.com/content/www/us/en/products/docs/io/universal-serial-bus/usb-type-c-ucsi-spec.html p28 Reviewed-by: Sebastian Reichel Signed-off-by: Mario Limonciello Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231009184643.129986-1-mario.limonciello@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/psy.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c index 384b42267f1f..b35c6e07911e 100644 --- a/drivers/usb/typec/ucsi/psy.c +++ b/drivers/usb/typec/ucsi/psy.c @@ -37,6 +37,15 @@ static int ucsi_psy_get_scope(struct ucsi_connector *con, struct device *dev = con->ucsi->dev; device_property_read_u8(dev, "scope", &scope); + if (scope == POWER_SUPPLY_SCOPE_UNKNOWN) { + u32 mask = UCSI_CAP_ATTR_POWER_AC_SUPPLY | + UCSI_CAP_ATTR_BATTERY_CHARGING; + + if (con->ucsi->cap.attributes & mask) + scope = POWER_SUPPLY_SCOPE_SYSTEM; + else + scope = POWER_SUPPLY_SCOPE_DEVICE; + } val->intval = scope; return 0; } -- cgit v1.2.3 From c4dd854f740c21ae8dd9903fc67969c5497cb14b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 25 Sep 2023 17:28:39 +0100 Subject: cpu-hotplug: Provide prototypes for arch CPU registration Provide common prototypes for arch_register_cpu() and arch_unregister_cpu(). These are called by acpi_processor.c, with weak versions, so the prototype for this is already set. It is generally not necessary for function prototypes to be conditional on preprocessor macros. Some architectures (e.g. Loongarch) are missing the prototype for this, and rather than add it to Loongarch's asm/cpu.h, do the job once for everyone. Since this covers everyone, remove the now unnecessary prototypes in asm/cpu.h, and therefore remove the 'static' from one of ia64's arch_register_cpu() definitions. [ tglx: Bring back the ia64 part and remove the ACPI prototypes ] Signed-off-by: Russell King (Oracle) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1qkoRr-0088Q8-Da@rmk-PC.armlinux.org.uk --- arch/ia64/include/asm/cpu.h | 5 ----- arch/ia64/kernel/topology.c | 2 +- arch/x86/include/asm/cpu.h | 2 -- arch/x86/kernel/topology.c | 2 +- drivers/acpi/acpi_processor.c | 1 + include/acpi/processor.h | 5 ----- include/linux/cpu.h | 2 ++ 7 files changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h index db125df9e088..642d71675ddb 100644 --- a/arch/ia64/include/asm/cpu.h +++ b/arch/ia64/include/asm/cpu.h @@ -15,9 +15,4 @@ DECLARE_PER_CPU(struct ia64_cpu, cpu_devices); DECLARE_PER_CPU(int, cpu_state); -#ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); -#endif - #endif /* _ASM_IA64_CPU_H_ */ diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 94a848b06f15..741863a187a6 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -59,7 +59,7 @@ void __ref arch_unregister_cpu(int num) } EXPORT_SYMBOL(arch_unregister_cpu); #else -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&sysfs_cpus[num].cpu, num); } diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 3a233ebff712..25050d953eee 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -28,8 +28,6 @@ struct x86_cpu { }; #ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); extern void soft_restart_cpu(void); #endif diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index ca004e2e4469..0bab03130033 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -54,7 +54,7 @@ void arch_unregister_cpu(int num) EXPORT_SYMBOL(arch_unregister_cpu); #else /* CONFIG_HOTPLUG_CPU */ -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&per_cpu(cpu_devices, num).cpu, num); } diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index c711db8a9c33..0f5218e361df 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) "ACPI: " fmt #include +#include #include #include #include diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 94181fe9780a..3f34ebb27525 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -465,9 +465,4 @@ extern int acpi_processor_ffh_lpi_probe(unsigned int cpu); extern int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi); #endif -#ifdef CONFIG_ACPI_HOTPLUG_CPU -extern int arch_register_cpu(int cpu); -extern void arch_unregister_cpu(int cpu); -#endif - #endif diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 0abd60a7987b..eb768a866fe3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -80,6 +80,8 @@ extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); +extern int arch_register_cpu(int cpu); +extern void arch_unregister_cpu(int cpu); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); -- cgit v1.2.3 From 4800021c630210ea0b19434a1fb56ab16385f2b3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 10 Oct 2023 12:24:58 +0200 Subject: media: subdev: Don't report V4L2_SUBDEV_CAP_STREAMS when the streams API is disabled Since the stream API is still experimental it is currently locked away behind the internal, default disabled, v4l2_subdev_enable_streams_api flag. Advertising V4L2_SUBDEV_CAP_STREAMS when the streams API is disabled confuses userspace. E.g. it causes the following libcamera error: ERROR SimplePipeline simple.cpp:1497 Failed to reset routes for /dev/v4l-subdev1: Inappropriate ioctl for device Don't report V4L2_SUBDEV_CAP_STREAMS when the streams API is disabled to avoid problems like this. Reported-by: Dennis Bonke Fixes: 9a6b5bf4c1bb ("media: add V4L2_SUBDEV_CAP_STREAMS") Cc: stable@vger.kernel.org # for >= 6.3 Signed-off-by: Hans de Goede Acked-by: Sakari Ailus Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/v4l2-core/v4l2-subdev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c index b92348ad61f6..31752c06d1f0 100644 --- a/drivers/media/v4l2-core/v4l2-subdev.c +++ b/drivers/media/v4l2-core/v4l2-subdev.c @@ -502,6 +502,13 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg, V4L2_SUBDEV_CLIENT_CAP_STREAMS; int rval; + /* + * If the streams API is not enabled, remove V4L2_SUBDEV_CAP_STREAMS. + * Remove this when the API is no longer experimental. + */ + if (!v4l2_subdev_enable_streams_api) + streams_subdev = false; + switch (cmd) { case VIDIOC_SUBDEV_QUERYCAP: { struct v4l2_subdev_capability *cap = arg; -- cgit v1.2.3 From c15cdea517414e0b29a11e0a0e2443d127c9109b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 6 Oct 2023 17:39:34 +0100 Subject: mm: slab: Do not create kmalloc caches smaller than arch_slab_minalign() Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment if DMA bouncing possible") allows architectures with non-coherent DMA to define a small ARCH_KMALLOC_MINALIGN (e.g. sizeof(unsigned long long)) and this has been enabled on arm64. With KASAN_HW_TAGS enabled, however, ARCH_SLAB_MINALIGN becomes 16 on arm64 (arch_slab_minalign() dynamically selects it since commit d949a8155d13 ("mm: make minimum slab alignment a runtime property")). This can lead to a situation where kmalloc-8 caches are attempted to be created with a kmem_caches.size aligned to 16. When the cache is mergeable, it can lead to kernel warnings like: sysfs: cannot create duplicate filename '/kernel/slab/:d-0000016' CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc1-00001-gda98843cd306-dirty #5 Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 Call trace: dump_backtrace+0x90/0xe8 show_stack+0x18/0x24 dump_stack_lvl+0x48/0x60 dump_stack+0x18/0x24 sysfs_warn_dup+0x64/0x80 sysfs_create_dir_ns+0xe8/0x108 kobject_add_internal+0x98/0x264 kobject_init_and_add+0x8c/0xd8 sysfs_slab_add+0x12c/0x248 slab_sysfs_init+0x98/0x14c do_one_initcall+0x6c/0x1b0 kernel_init_freeable+0x1c0/0x288 kernel_init+0x24/0x1e0 ret_from_fork+0x10/0x20 kobject: kobject_add_internal failed for :d-0000016 with -EEXIST, don't try to register things with the same name in the same directory. SLUB: Unable to add boot slab dma-kmalloc-8 to sysfs Limit the __kmalloc_minalign() return value (used to create the kmalloc-* caches) to arch_slab_minalign() so that kmalloc-8 caches are skipped when KASAN_HW_TAGS is enabled (both config and runtime). Reported-by: Mark Rutland Fixes: b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment if DMA bouncing possible") Signed-off-by: Catalin Marinas Cc: Peter Collingbourne Cc: stable@vger.kernel.org # 6.5.x Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 8fda308e400d..9bbffe82d65a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -895,10 +895,13 @@ void __init setup_kmalloc_cache_index_table(void) static unsigned int __kmalloc_minalign(void) { + unsigned int minalign = dma_get_cache_alignment(); + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && is_swiotlb_allocated()) - return ARCH_KMALLOC_MINALIGN; - return dma_get_cache_alignment(); + minalign = ARCH_KMALLOC_MINALIGN; + + return max(minalign, arch_slab_minalign()); } void __init -- cgit v1.2.3 From 30c1886ab53e89be5a0aa16df47b367957fc4bc2 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 29 Sep 2023 09:31:08 +0300 Subject: media: xilinx-vipp: Look for entities also in waiting_list The big V4L2 async framework overhaul simplified linked lists used by the V4L2 async framework. This affected a few drivers and it turns out a few of those drivers rely on searching for entities in both async notifier's waiting and done lists. Do that by separately traversing both. Fixes: 9bf19fbf0c8b ("media: v4l: async: Rework internal lists") Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/platform/xilinx/xilinx-vipp.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/xilinx/xilinx-vipp.c b/drivers/media/platform/xilinx/xilinx-vipp.c index 4285770fde18..996684a73038 100644 --- a/drivers/media/platform/xilinx/xilinx-vipp.c +++ b/drivers/media/platform/xilinx/xilinx-vipp.c @@ -55,11 +55,18 @@ xvip_graph_find_entity(struct xvip_composite_device *xdev, { struct xvip_graph_entity *entity; struct v4l2_async_connection *asd; - - list_for_each_entry(asd, &xdev->notifier.done_list, asc_entry) { - entity = to_xvip_entity(asd); - if (entity->asd.match.fwnode == fwnode) - return entity; + struct list_head *lists[] = { + &xdev->notifier.done_list, + &xdev->notifier.waiting_list + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(lists); i++) { + list_for_each_entry(asd, lists[i], asc_entry) { + entity = to_xvip_entity(asd); + if (entity->asd.match.fwnode == fwnode) + return entity; + } } return NULL; -- cgit v1.2.3 From 1d4c25854aac872e6d3d1dad5dc5eb580d18ac6c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 29 Sep 2023 11:31:33 +0200 Subject: media: ipu-bridge: Add missing acpi_dev_put() in ipu_bridge_get_ivsc_acpi_dev() In ipu_bridge_get_ivsc_acpi_dev(), the "ivsc_adev" acpi_device pointer from the outer loop is handed over to the caller, which takes proper care of its reference count. However, the "consumer" acpi_device pointer from the inner loop is lost, without decrementing its reference count. Fix this by adding the missing call to acpi_dev_put(). Fixes: c66821f381ae ("media: pci: intel: Add IVSC support for IPU bridge driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Andy Shevchenko Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/pci/intel/ipu-bridge.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/pci/intel/ipu-bridge.c b/drivers/media/pci/intel/ipu-bridge.c index 1bde8b6e0b11..e38198e259c0 100644 --- a/drivers/media/pci/intel/ipu-bridge.c +++ b/drivers/media/pci/intel/ipu-bridge.c @@ -107,8 +107,10 @@ static struct acpi_device *ipu_bridge_get_ivsc_acpi_dev(struct acpi_device *adev for_each_acpi_dev_match(ivsc_adev, acpi_id->id, NULL, -1) /* camera sensor depends on IVSC in DSDT if exist */ for_each_acpi_consumer_dev(ivsc_adev, consumer) - if (consumer->handle == handle) + if (consumer->handle == handle) { + acpi_dev_put(consumer); return ivsc_adev; + } } return NULL; -- cgit v1.2.3 From c46f16f156ac58afcf4addc850bb5dfbca77b9fc Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Tue, 10 Oct 2023 09:07:44 +0200 Subject: media: i2c: ov8858: Don't set fwnode in the driver This makes the driver work with the new check in v4l2_async_register_subdev() that was introduced recently in 6.6-rc1. Without this change, probe fails with: ov8858 1-0036: Detected OV8858 sensor, revision 0xb2 ov8858 1-0036: sub-device fwnode is an endpoint! ov8858 1-0036: v4l2 async register subdev failed ov8858: probe of 1-0036 failed with error -22 This also simplifies the driver a bit. Signed-off-by: Ondrej Jirman Reviewed-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/i2c/ov8858.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/media/i2c/ov8858.c b/drivers/media/i2c/ov8858.c index 3af6125a2eee..4d9fd76e2f60 100644 --- a/drivers/media/i2c/ov8858.c +++ b/drivers/media/i2c/ov8858.c @@ -1850,9 +1850,9 @@ static int ov8858_parse_of(struct ov8858 *ov8858) } ret = v4l2_fwnode_endpoint_parse(endpoint, &vep); + fwnode_handle_put(endpoint); if (ret) { dev_err(dev, "Failed to parse endpoint: %d\n", ret); - fwnode_handle_put(endpoint); return ret; } @@ -1864,12 +1864,9 @@ static int ov8858_parse_of(struct ov8858 *ov8858) default: dev_err(dev, "Unsupported number of data lanes %u\n", ov8858->num_lanes); - fwnode_handle_put(endpoint); return -EINVAL; } - ov8858->subdev.fwnode = endpoint; - return 0; } @@ -1913,7 +1910,7 @@ static int ov8858_probe(struct i2c_client *client) ret = ov8858_init_ctrls(ov8858); if (ret) - goto err_put_fwnode; + return ret; sd = &ov8858->subdev; sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE | V4L2_SUBDEV_FL_HAS_EVENTS; @@ -1964,8 +1961,6 @@ err_clean_entity: media_entity_cleanup(&sd->entity); err_free_handler: v4l2_ctrl_handler_free(&ov8858->ctrl_handler); -err_put_fwnode: - fwnode_handle_put(ov8858->subdev.fwnode); return ret; } @@ -1978,7 +1973,6 @@ static void ov8858_remove(struct i2c_client *client) v4l2_async_unregister_subdev(sd); media_entity_cleanup(&sd->entity); v4l2_ctrl_handler_free(&ov8858->ctrl_handler); - fwnode_handle_put(ov8858->subdev.fwnode); pm_runtime_disable(&client->dev); if (!pm_runtime_status_suspended(&client->dev)) -- cgit v1.2.3 From 91d20ab9d9ca035527af503d00e1e30d6c375f2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 9 Oct 2023 10:18:01 +0200 Subject: wifi: cfg80211: use system_unbound_wq for wiphy work Since wiphy work items can run pretty much arbitrary code in the stack/driver, it can take longer to run all of this, so we shouldn't be using system_wq via schedule_work(). Also, we lock the wiphy (which is the reason this exists), so use system_unbound_wq. Reported-and-tested-by: Kalle Valo Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 64e861617110..acec41c1809a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1622,7 +1622,7 @@ void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work) list_add_tail(&work->entry, &rdev->wiphy_work_list); spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); - schedule_work(&rdev->wiphy_work); + queue_work(system_unbound_wq, &rdev->wiphy_work); } EXPORT_SYMBOL_GPL(wiphy_work_queue); -- cgit v1.2.3 From 02e0e426a2fb1446ebd7bc0eccfb48aedefb966b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Oct 2023 23:09:18 +0200 Subject: wifi: mac80211: fix error path key leak In the previous key leak fix for the other error paths, I meant to unify all of them to the same place, but used the wrong label, which I noticed when doing the merge into wireless-next. Fix it. Fixes: d097ae01ebd4 ("wifi: mac80211: fix potential key leak") Signed-off-by: Johannes Berg --- net/mac80211/key.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 0665ff5e456e..a2db0585dce0 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -912,7 +912,7 @@ int ieee80211_key_link(struct ieee80211_key *key, */ if (ieee80211_key_identical(sdata, old_key, key)) { ret = -EALREADY; - goto unlock; + goto out; } key->local = sdata->local; @@ -940,7 +940,6 @@ int ieee80211_key_link(struct ieee80211_key *key, out: ieee80211_key_free_unused(key); - unlock: mutex_unlock(&sdata->local->key_mtx); return ret; -- cgit v1.2.3 From b2f750c3a80b285cd60c9346f8c96bd0a2a66cde Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Wed, 4 Oct 2023 18:39:28 +0200 Subject: net: rfkill: gpio: prevent value glitch during probe When either reset- or shutdown-gpio have are initially deasserted, e.g. after a reboot - or when the hardware does not include pull-down, there will be a short toggle of both IOs to logical 0 and back to 1. It seems that the rfkill default is unblocked, so the driver should not glitch to output low during probe. It can lead e.g. to unexpected lte modem reconnect: [1] root@localhost:~# dmesg | grep "usb 2-1" [ 2.136124] usb 2-1: new SuperSpeed USB device number 2 using xhci-hcd [ 21.215278] usb 2-1: USB disconnect, device number 2 [ 28.833977] usb 2-1: new SuperSpeed USB device number 3 using xhci-hcd The glitch has been discovered on an arm64 board, now that device-tree support for the rfkill-gpio driver has finally appeared :). Change the flags for devm_gpiod_get_optional from GPIOD_OUT_LOW to GPIOD_ASIS to avoid any glitches. The rfkill driver will set the intended value during rfkill_sync_work. Fixes: 7176ba23f8b5 ("net: rfkill: add generic gpio rfkill driver") Signed-off-by: Josua Mayer Link: https://lore.kernel.org/r/20231004163928.14609-1-josua@solid-run.com Signed-off-by: Johannes Berg --- net/rfkill/rfkill-gpio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index e9d1b2f2ff0a..5a81505fba9a 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -108,13 +108,13 @@ static int rfkill_gpio_probe(struct platform_device *pdev) rfkill->clk = devm_clk_get(&pdev->dev, NULL); - gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW); + gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS); if (IS_ERR(gpio)) return PTR_ERR(gpio); rfkill->reset_gpio = gpio; - gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW); + gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_ASIS); if (IS_ERR(gpio)) return PTR_ERR(gpio); -- cgit v1.2.3 From f2ac54ebf85615a6d78f5eb213a8bbeeb17ebe5d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 11 Oct 2023 16:55:10 +0200 Subject: net: rfkill: reduce data->mtx scope in rfkill_fop_open In syzbot runs, lockdep reports that there's a (potential) deadlock here of data->mtx being locked recursively. This isn't really a deadlock since they are different instances, but lockdep cannot know, and teaching it would be far more difficult than other fixes. At the same time we don't even really _need_ the mutex to be locked in rfkill_fop_open(), since we're modifying only a completely fresh instance of 'data' (struct rfkill_data) that's not yet added to the global list. However, to avoid any reordering etc. within the globally locked section, and to make the code look more symmetric, we should still lock the data->events list manipulation, but also need to lock _only_ that. So do that. Reported-by: syzbot+509238e523e032442b80@syzkaller.appspotmail.com Fixes: 2c3dfba4cf84 ("rfkill: sync before userspace visibility/changes") Signed-off-by: Johannes Berg --- net/rfkill/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 08630896b6c8..14cc8fe8584b 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1180,7 +1180,6 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) init_waitqueue_head(&data->read_wait); mutex_lock(&rfkill_global_mutex); - mutex_lock(&data->mtx); /* * start getting events from elsewhere but hold mtx to get * startup events added first @@ -1192,10 +1191,11 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) goto free; rfkill_sync(rfkill); rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD); + mutex_lock(&data->mtx); list_add_tail(&ev->list, &data->events); + mutex_unlock(&data->mtx); } list_add(&data->list, &rfkill_fds); - mutex_unlock(&data->mtx); mutex_unlock(&rfkill_global_mutex); file->private_data = data; @@ -1203,7 +1203,6 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) return stream_open(inode, file); free: - mutex_unlock(&data->mtx); mutex_unlock(&rfkill_global_mutex); mutex_destroy(&data->mtx); list_for_each_entry_safe(ev, tmp, &data->events, list) -- cgit v1.2.3 From 92d4abd66f7080075793970fc8f241239e58a9e7 Mon Sep 17 00:00:00 2001 From: Arkadiusz Bokowy Date: Wed, 20 Sep 2023 17:30:07 +0200 Subject: Bluetooth: vhci: Fix race when opening vhci device When the vhci device is opened in the two-step way, i.e.: open device then write a vendor packet with requested controller type, the device shall respond with a vendor packet which includes HCI index of created interface. When the virtual HCI is created, the host sends a reset request to the controller. This request is processed by the vhci_send_frame() function. However, this request is send by a different thread, so it might happen that this HCI request will be received before the vendor response is queued in the read queue. This results in the HCI vendor response and HCI reset request inversion in the read queue which leads to improper behavior of btvirt: > dmesg [1754256.640122] Bluetooth: MGMT ver 1.22 [1754263.023806] Bluetooth: MGMT ver 1.22 [1754265.043775] Bluetooth: hci1: Opcode 0x c03 failed: -110 In order to synchronize vhci two-step open/setup process with virtual HCI initialization, this patch adds internal lock when queuing data in the vhci_send_frame() function. Signed-off-by: Arkadiusz Bokowy Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_vhci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 40e2b9fa11a2..f3892e9ce800 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -74,7 +74,10 @@ static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) struct vhci_data *data = hci_get_drvdata(hdev); memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1); + + mutex_lock(&data->open_mutex); skb_queue_tail(&data->readq, skb); + mutex_unlock(&data->open_mutex); wake_up_interruptible(&data->read_wait); return 0; -- cgit v1.2.3 From acab8ff29a2a226409cfe04e6d2e0896928c1b3a Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Thu, 28 Sep 2023 10:52:57 +0300 Subject: Bluetooth: ISO: Fix invalid context error This moves the hci_le_terminate_big_sync call from rx_work to cmd_sync_work, to avoid calling sleeping function from an invalid context. Reported-by: syzbot+c715e1bd8dfbcb1ab176@syzkaller.appspotmail.com Fixes: a0bfde167b50 ("Bluetooth: ISO: Add support for connecting multiple BISes") Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 31d02b54eea1..e6cfc65abcb8 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7021,6 +7021,14 @@ unlock: hci_dev_unlock(hdev); } +static int hci_iso_term_big_sync(struct hci_dev *hdev, void *data) +{ + u8 handle = PTR_UINT(data); + + return hci_le_terminate_big_sync(hdev, handle, + HCI_ERROR_LOCAL_HOST_TERM); +} + static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -7065,16 +7073,17 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, rcu_read_lock(); } + rcu_read_unlock(); + if (!ev->status && !i) /* If no BISes have been connected for the BIG, * terminate. This is in case all bound connections * have been closed before the BIG creation * has completed. */ - hci_le_terminate_big_sync(hdev, ev->handle, - HCI_ERROR_LOCAL_HOST_TERM); + hci_cmd_sync_queue(hdev, hci_iso_term_big_sync, + UINT_PTR(ev->handle), NULL); - rcu_read_unlock(); hci_dev_unlock(hdev); } -- cgit v1.2.3 From 33155c4aae5260475def6f7438e4e35564f4f3ba Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Sun, 1 Oct 2023 16:59:31 +0800 Subject: Bluetooth: hci_event: Ignore NULL link key This change is used to relieve CVE-2020-26555. The description of the CVE: Bluetooth legacy BR/EDR PIN code pairing in Bluetooth Core Specification 1.0B through 5.2 may permit an unauthenticated nearby device to spoof the BD_ADDR of the peer device to complete pairing without knowledge of the PIN. [1] The detail of this attack is in IEEE paper: BlueMirror: Reflections on Bluetooth Pairing and Provisioning Protocols [2] It's a reflection attack. The paper mentioned that attacker can induce the attacked target to generate null link key (zero key) without PIN code. In BR/EDR, the key generation is actually handled in the controller which is below HCI. Thus, we can ignore null link key in the handler of "Link Key Notification event" to relieve the attack. A similar implementation also shows in btstack project. [3] v3: Drop the connection when null link key be detected. v2: - Used Link: tag instead of Closes: - Used bt_dev_dbg instead of BT_DBG - Added Fixes: tag Cc: stable@vger.kernel.org Fixes: 55ed8ca10f35 ("Bluetooth: Implement link key handling for the management interface") Link: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26555 [1] Link: https://ieeexplore.ieee.org/abstract/document/9474325/authors#authors [2] Link: https://github.com/bluekitchen/btstack/blob/master/src/hci.c#L3722 [3] Signed-off-by: Lee, Chun-Yi Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e6cfc65abcb8..742dcfd136bc 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4742,6 +4742,15 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data, if (!conn) goto unlock; + /* Ignore NULL link key against CVE-2020-26555 */ + if (!memcmp(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { + bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR", + &ev->bdaddr); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); + hci_conn_drop(conn); + goto unlock; + } + hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; hci_conn_drop(conn); -- cgit v1.2.3 From 1ffc6f8cc33268731fcf9629fc4438f6db1191fc Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Sun, 1 Oct 2023 16:59:58 +0800 Subject: Bluetooth: Reject connection with the device which has same BD_ADDR This change is used to relieve CVE-2020-26555. The description of the CVE: Bluetooth legacy BR/EDR PIN code pairing in Bluetooth Core Specification 1.0B through 5.2 may permit an unauthenticated nearby device to spoof the BD_ADDR of the peer device to complete pairing without knowledge of the PIN. [1] The detail of this attack is in IEEE paper: BlueMirror: Reflections on Bluetooth Pairing and Provisioning Protocols [2] It's a reflection attack. The paper mentioned that attacker can induce the attacked target to generate null link key (zero key) without PIN code. In BR/EDR, the key generation is actually handled in the controller which is below HCI. A condition of this attack is that attacker should change the BR_ADDR of his hacking device (Host B) to equal to the BR_ADDR with the target device being attacked (Host A). Thus, we reject the connection with device which has same BD_ADDR both on HCI_Create_Connection and HCI_Connection_Request to prevent the attack. A similar implementation also shows in btstack project. [3][4] Cc: stable@vger.kernel.org Link: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26555 [1] Link: https://ieeexplore.ieee.org/abstract/document/9474325/authors#authors [2] Link: https://github.com/bluekitchen/btstack/blob/master/src/hci.c#L3523 [3] Link: https://github.com/bluekitchen/btstack/blob/master/src/hci.c#L7297 [4] Signed-off-by: Lee, Chun-Yi Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 9 +++++++++ net/bluetooth/hci_event.c | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7a6f20338db8..73470cc3518a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1627,6 +1627,15 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-EOPNOTSUPP); } + /* Reject outgoing connection to device with same BD ADDR against + * CVE-2020-26555 + */ + if (!bacmp(&hdev->bdaddr, dst)) { + bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", + dst); + return ERR_PTR(-ECONNREFUSED); + } + acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { acl = hci_conn_add(hdev, ACL_LINK, dst, HCI_ROLE_MASTER); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 742dcfd136bc..61b1b244595e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3268,6 +3268,17 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "bdaddr %pMR type 0x%x", &ev->bdaddr, ev->link_type); + /* Reject incoming connection from device with same BD ADDR against + * CVE-2020-26555 + */ + if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) + { + bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", + &ev->bdaddr); + hci_reject_conn(hdev, &ev->bdaddr); + return; + } + mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type, &flags); -- cgit v1.2.3 From a239110ee8e0b0aafa265f0d54f7a16744855e70 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 30 Sep 2023 15:53:32 +0300 Subject: Bluetooth: hci_sync: always check if connection is alive before deleting In hci_abort_conn_sync it is possible that conn is deleted concurrently by something else, also e.g. when waiting for hdev->lock. This causes double deletion of the conn, so UAF or conn_hash.list corruption. Fix by having all code paths check that the connection is still in conn_hash before deleting it, while holding hdev->lock which prevents any races. Log (when powering off while BAP streaming, occurs rarely): ======================================================================= kernel BUG at lib/list_debug.c:56! ... ? __list_del_entry_valid (lib/list_debug.c:56) hci_conn_del (net/bluetooth/hci_conn.c:154) bluetooth hci_abort_conn_sync (net/bluetooth/hci_sync.c:5415) bluetooth ? __pfx_hci_abort_conn_sync+0x10/0x10 [bluetooth] ? lock_release+0x1d5/0x3c0 ? hci_disconnect_all_sync.constprop.0+0xb2/0x230 [bluetooth] ? __pfx_lock_release+0x10/0x10 ? __kmem_cache_free+0x14d/0x2e0 hci_disconnect_all_sync.constprop.0+0xda/0x230 [bluetooth] ? __pfx_hci_disconnect_all_sync.constprop.0+0x10/0x10 [bluetooth] ? hci_clear_adv_sync+0x14f/0x170 [bluetooth] ? __pfx_set_powered_sync+0x10/0x10 [bluetooth] hci_set_powered_sync+0x293/0x450 [bluetooth] ======================================================================= Fixes: 94d9ba9f9888 ("Bluetooth: hci_sync: Fix UAF in hci_disconnect_all_sync") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d06e07a0ea5a..a15ab0b874a9 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5369,6 +5369,7 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { int err = 0; u16 handle = conn->handle; + bool disconnect = false; struct hci_conn *c; switch (conn->state) { @@ -5399,24 +5400,15 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) hci_dev_unlock(hdev); return 0; case BT_BOUND: - hci_dev_lock(hdev); - hci_conn_failed(conn, reason); - hci_dev_unlock(hdev); - return 0; + break; default: - hci_dev_lock(hdev); - conn->state = BT_CLOSED; - hci_disconn_cfm(conn, reason); - hci_conn_del(conn); - hci_dev_unlock(hdev); - return 0; + disconnect = true; + break; } hci_dev_lock(hdev); - /* Check if the connection hasn't been cleanup while waiting - * commands to complete. - */ + /* Check if the connection has been cleaned up concurrently */ c = hci_conn_hash_lookup_handle(hdev, handle); if (!c || c != conn) { err = 0; @@ -5428,7 +5420,13 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) * or in case of LE it was still scanning so it can be cleanup * safely. */ - hci_conn_failed(conn, reason); + if (disconnect) { + conn->state = BT_CLOSED; + hci_disconn_cfm(conn, reason); + hci_conn_del(conn); + } else { + hci_conn_failed(conn, reason); + } unlock: hci_dev_unlock(hdev); -- cgit v1.2.3 From c7f59461f5a78994613afc112cdd73688aef9076 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Wed, 4 Oct 2023 20:42:24 +0800 Subject: Bluetooth: Fix a refcnt underflow problem for hci_conn Syzbot reports a warning as follows: WARNING: CPU: 1 PID: 26946 at net/bluetooth/hci_conn.c:619 hci_conn_timeout+0x122/0x210 net/bluetooth/hci_conn.c:619 ... Call Trace: process_one_work+0x884/0x15c0 kernel/workqueue.c:2630 process_scheduled_works kernel/workqueue.c:2703 [inline] worker_thread+0x8b9/0x1290 kernel/workqueue.c:2784 kthread+0x33c/0x440 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 It is because the HCI_EV_SIMPLE_PAIR_COMPLETE event handler drops hci_conn directly without check Simple Pairing whether be enabled. But the Simple Pairing process can only be used if both sides have the support enabled in the host stack. Add hci_conn_ssp_enabled() for hci_conn in HCI_EV_IO_CAPA_REQUEST and HCI_EV_SIMPLE_PAIR_COMPLETE event handlers to fix the problem. Fixes: 0493684ed239 ("[Bluetooth] Disable disconnect timer during Simple Pairing") Signed-off-by: Ziyang Xuan Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 61b1b244595e..7e1c875e0e88 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5323,7 +5323,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn) + if (!conn || !hci_conn_ssp_enabled(conn)) goto unlock; hci_conn_hold(conn); @@ -5570,7 +5570,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn) + if (!conn || !hci_conn_ssp_enabled(conn)) goto unlock; /* Reset the authentication requirement to unknown */ -- cgit v1.2.3 From 6868b8505c807ad9397d78cc4e07cb1cb3582152 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 11 Oct 2023 12:35:20 -0700 Subject: xfs: adjust the incore perag block_count when shrinking If we reduce the number of blocks in an AG, we must update the incore geometry values as well. Fixes: 0800169e3e2c9 ("xfs: Pre-calculate per-AG agbno geometry") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_ag.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index e9cc481b4ddf..f9f4d694640d 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -1001,6 +1001,12 @@ xfs_ag_shrink_space( error = -ENOSPC; goto resv_init_out; } + + /* Update perag geometry */ + pag->block_count -= delta; + __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min, + &pag->agino_max); + xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH); xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH); return 0; -- cgit v1.2.3 From 442177be8c3b8edfc29e14837e59771181c590b3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 11 Oct 2023 12:35:21 -0700 Subject: xfs: process free extents to busy list in FIFO order When we're adding extents to the busy discard list, add them to the tail of the list so that we get FIFO order. For FITRIM commands, this means that we send discard bios sorted in order from longest to shortest, like we did before commit 89cfa899608fc. For transactions that are freeing extents, this puts them in the transaction's busy list in FIFO order as well, which shouldn't make any noticeable difference. Fixes: 89cfa899608fc ("xfs: reduce AGF hold times during fstrim operations") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_extent_busy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 746814815b1d..9ecfdcdc752f 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -62,7 +62,8 @@ xfs_extent_busy_insert_list( rb_link_node(&new->rb_node, parent, rbp); rb_insert_color(&new->rb_node, &pag->pagb_tree); - list_add(&new->list, busy_list); + /* always process discard lists in fifo order */ + list_add_tail(&new->list, busy_list); spin_unlock(&pag->pagb_lock); } -- cgit v1.2.3 From 1364a3c391aedfeb32aa025303ead3d7c91cdf9d Mon Sep 17 00:00:00 2001 From: Sarthak Kukreti Date: Wed, 11 Oct 2023 13:12:30 -0700 Subject: block: Don't invalidate pagecache for invalid falloc modes Only call truncate_bdev_range() if the fallocate mode is supported. This fixes a bug where data in the pagecache could be invalidated if the fallocate() was called on the block device with an invalid mode. Fixes: 25f4c41415e5 ("block: implement (some of) fallocate for block devices") Cc: stable@vger.kernel.org Reported-by: "Darrick J. Wong" Signed-off-by: Sarthak Kukreti Reviewed-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Mike Snitzer Fixes: line? I've never seen those wrapped. Link: https://lore.kernel.org/r/20231011201230.750105-1-sarthakkukreti@chromium.org Signed-off-by: Jens Axboe --- block/fops.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/block/fops.c b/block/fops.c index acff3d5d22d4..73e42742543f 100644 --- a/block/fops.c +++ b/block/fops.c @@ -772,24 +772,35 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, filemap_invalidate_lock(inode->i_mapping); - /* Invalidate the page cache, including dirty pages. */ - error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); - if (error) - goto fail; - + /* + * Invalidate the page cache, including dirty pages, for valid + * de-allocate mode calls to fallocate(). + */ switch (mode) { case FALLOC_FL_ZERO_RANGE: case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: + error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); + if (error) + goto fail; + error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT, len >> SECTOR_SHIFT, GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: + error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); + if (error) + goto fail; + error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT, len >> SECTOR_SHIFT, GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: + error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); + if (error) + goto fail; + error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT, len >> SECTOR_SHIFT, GFP_KERNEL); break; -- cgit v1.2.3 From 3c90c01e49342b166e5c90ec2c85b220be15a20e Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Wed, 13 Sep 2023 18:29:42 +0800 Subject: xfs: correct calculation for agend and blockcount The agend should be "start + length - 1", then, blockcount should be "end + 1 - start". Correct 2 calculation mistakes. Also, rename "agend" to "range_agend" because it's not the end of the AG per se; it's the end of the dead region within an AG's agblock space. Fixes: 5cf32f63b0f4 ("xfs: fix the calculation for "end" and "length"") Signed-off-by: Shiyang Ruan Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/xfs_notify_failure.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index 4a9bbd3fe120..a7daa522e00f 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -126,8 +126,8 @@ xfs_dax_notify_ddev_failure( struct xfs_rmap_irec ri_low = { }; struct xfs_rmap_irec ri_high; struct xfs_agf *agf; - xfs_agblock_t agend; struct xfs_perag *pag; + xfs_agblock_t range_agend; pag = xfs_perag_get(mp, agno); error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp); @@ -148,10 +148,10 @@ xfs_dax_notify_ddev_failure( ri_high.rm_startblock = XFS_FSB_TO_AGBNO(mp, end_fsbno); agf = agf_bp->b_addr; - agend = min(be32_to_cpu(agf->agf_length), + range_agend = min(be32_to_cpu(agf->agf_length) - 1, ri_high.rm_startblock); notify.startblock = ri_low.rm_startblock; - notify.blockcount = agend - ri_low.rm_startblock; + notify.blockcount = range_agend + 1 - ri_low.rm_startblock; error = xfs_rmap_query_range(cur, &ri_low, &ri_high, xfs_dax_failure_fn, ¬ify); -- cgit v1.2.3 From f93b9300301d30f275f9bd7165cbb53d5094bd8d Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 24 Aug 2023 15:47:23 +0800 Subject: xfs: Remove duplicate include ./fs/xfs/scrub/xfile.c: xfs_format.h is included more than once. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6209 Signed-off-by: Jiapeng Chong Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/scrub/xfile.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c index d98e8e77c684..090c3ead43fd 100644 --- a/fs/xfs/scrub/xfile.c +++ b/fs/xfs/scrub/xfile.c @@ -10,7 +10,6 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_format.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/scrub.h" -- cgit v1.2.3 From cbc06310c36f73a5f3b0c6f0d974d60cf66d816b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 29 Sep 2023 14:43:18 -0400 Subject: xfs: reinstate the old i_version counter as STATX_CHANGE_COOKIE The handling of STATX_CHANGE_COOKIE was moved into generic_fillattr in commit 0d72b92883c6 (fs: pass the request_mask to generic_fillattr), but we didn't account for the fact that xfs doesn't call generic_fillattr at all. Make XFS report its i_version as the STATX_CHANGE_COOKIE. Fixes: 0d72b92883c6 (fs: pass the request_mask to generic_fillattr) Signed-off-by: Jeff Layton Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/xfs_iops.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1c1e6171209d..2b3b05c28e9e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -584,6 +584,11 @@ xfs_vn_getattr( } } + if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) { + stat->change_cookie = inode_query_iversion(inode); + stat->result_mask |= STATX_CHANGE_COOKIE; + } + /* * Note: If you add another clause to set an attribute flag, please * update attributes_mask below. -- cgit v1.2.3 From ebd032fa881882fef2acb9da1bbde48d8233241d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Oct 2023 13:12:58 +0200 Subject: netfilter: nf_tables: do not remove elements if set backend implements .abort pipapo set backend maintains two copies of the datastructure, removing the elements from the copy that is going to be discarded slows down the abort path significantly, from several minutes to few seconds after this patch. Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a72b6aeefb1b..c3de3791cabd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10347,7 +10347,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; } te = (struct nft_trans_elem *)trans->data; - nft_setelem_remove(net, te->set, &te->elem); + if (!te->set->ops->abort || + nft_setelem_is_catchall(te->set, &te->elem)) + nft_setelem_remove(net, te->set, &te->elem); + if (!nft_setelem_is_catchall(te->set, &te->elem)) atomic_dec(&te->set->nelems); -- cgit v1.2.3 From 2e1d175410972285333193837a4250a74cd472e6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Oct 2023 10:53:08 +0200 Subject: netfilter: nfnetlink_log: silence bogus compiler warning net/netfilter/nfnetlink_log.c:800:18: warning: variable 'ctinfo' is uninitialized The warning is bogus, the variable is only used if ct is non-NULL and always initialised in that case. Init to 0 too to silence this. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309100514.ndBFebXN-lkp@intel.com/ Signed-off-by: Florian Westphal --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 53c9e76473ba..f03f4d4d7d88 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -698,8 +698,8 @@ nfulnl_log_packet(struct net *net, unsigned int plen = 0; struct nfnl_log_net *log = nfnl_log_pernet(net); const struct nfnl_ct_hook *nfnl_ct = NULL; + enum ip_conntrack_info ctinfo = 0; struct nf_conn *ct = NULL; - enum ip_conntrack_info ctinfo; if (li_user && li_user->type == NF_LOG_TYPE_ULOG) li = li_user; -- cgit v1.2.3 From d51c42cdef5f961f63e39e172e4dfdcac54acd5e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 Oct 2023 16:17:51 -0700 Subject: netfilter: nf_tables: Annotate struct nft_pipapo_match with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct nft_pipapo_match. Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netfilter-devel@vger.kernel.org Cc: coreteam@netfilter.org Cc: netdev@vger.kernel.org Link: https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1] Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 25a75591583e..2e164a319945 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -147,7 +147,7 @@ struct nft_pipapo_match { unsigned long * __percpu *scratch; size_t bsize_max; struct rcu_head rcu; - struct nft_pipapo_field f[]; + struct nft_pipapo_field f[] __counted_by(field_count); }; /** -- cgit v1.2.3 From 4c90bba60c26db7dc7df450f748e86440149786e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Oct 2023 11:57:42 +0200 Subject: netfilter: nf_tables: do not refresh timeout when resetting element The dump and reset command should not refresh the timeout, this command is intended to allow users to list existing stateful objects and reset them, element expiration should be refresh via transaction instead with a specific command to achieve this, otherwise this is entering combo semantics that will be hard to be undone later (eg. a user asking to retrieve counters but _not_ requiring to refresh expiration). Fixes: 079cd633219d ("netfilter: nf_tables: Introduce NFT_MSG_GETSETELEM_RESET") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c3de3791cabd..aae6ffebb413 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5556,7 +5556,6 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - u64 timeout = 0; nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM); if (nest == NULL) @@ -5592,15 +5591,11 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, htonl(*nft_set_ext_flags(ext)))) goto nla_put_failure; - if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) { - timeout = *nft_set_ext_timeout(ext); - if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - nf_jiffies64_to_msecs(timeout), - NFTA_SET_ELEM_PAD)) - goto nla_put_failure; - } else if (set->flags & NFT_SET_TIMEOUT) { - timeout = READ_ONCE(set->timeout); - } + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && + nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, + nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)), + NFTA_SET_ELEM_PAD)) + goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { u64 expires, now = get_jiffies_64(); @@ -5615,9 +5610,6 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, nf_jiffies64_to_msecs(expires), NFTA_SET_ELEM_PAD)) goto nla_put_failure; - - if (reset) - *nft_set_ext_expiration(ext) = now + timeout; } if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { -- cgit v1.2.3 From 52177bbf19e6e9398375a148d2e13ed492b40b80 Mon Sep 17 00:00:00 2001 From: Xingyuan Mo Date: Mon, 9 Oct 2023 18:36:14 +0800 Subject: nf_tables: fix NULL pointer dereference in nft_inner_init() We should check whether the NFTA_INNER_NUM netlink attribute is present before accessing it, otherwise a null pointer deference error will occur. Call Trace: dump_stack_lvl+0x4f/0x90 print_report+0x3f0/0x620 kasan_report+0xcd/0x110 __asan_load4+0x84/0xa0 nft_inner_init+0x128/0x2e0 nf_tables_newrule+0x813/0x1230 nfnetlink_rcv_batch+0xec3/0x1170 nfnetlink_rcv+0x1e4/0x220 netlink_unicast+0x34e/0x4b0 netlink_sendmsg+0x45c/0x7e0 __sys_sendto+0x355/0x370 __x64_sys_sendto+0x84/0xa0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") Signed-off-by: Xingyuan Mo Signed-off-by: Florian Westphal --- net/netfilter/nft_inner.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c index 28e2873ba24e..928312d01eb1 100644 --- a/net/netfilter/nft_inner.c +++ b/net/netfilter/nft_inner.c @@ -298,6 +298,7 @@ static int nft_inner_init(const struct nft_ctx *ctx, int err; if (!tb[NFTA_INNER_FLAGS] || + !tb[NFTA_INNER_NUM] || !tb[NFTA_INNER_HDRSIZE] || !tb[NFTA_INNER_TYPE] || !tb[NFTA_INNER_EXPR]) -- cgit v1.2.3 From 505ce0630ad5d31185695f8a29dde8d29f28faa7 Mon Sep 17 00:00:00 2001 From: Xingyuan Mo Date: Mon, 9 Oct 2023 18:36:15 +0800 Subject: nf_tables: fix NULL pointer dereference in nft_expr_inner_parse() We should check whether the NFTA_EXPR_NAME netlink attribute is present before accessing it, otherwise a null pointer deference error will occur. Call Trace: dump_stack_lvl+0x4f/0x90 print_report+0x3f0/0x620 kasan_report+0xcd/0x110 __asan_load2+0x7d/0xa0 nla_strcmp+0x2f/0x90 __nft_expr_type_get+0x41/0xb0 nft_expr_inner_parse+0xe3/0x200 nft_inner_init+0x1be/0x2e0 nf_tables_newrule+0x813/0x1230 nfnetlink_rcv_batch+0xec3/0x1170 nfnetlink_rcv+0x1e4/0x220 netlink_unicast+0x34e/0x4b0 netlink_sendmsg+0x45c/0x7e0 __sys_sendto+0x355/0x370 __x64_sys_sendto+0x84/0xa0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") Signed-off-by: Xingyuan Mo Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index aae6ffebb413..a623d31b6518 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3166,7 +3166,7 @@ int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, if (err < 0) return err; - if (!tb[NFTA_EXPR_DATA]) + if (!tb[NFTA_EXPR_DATA] || !tb[NFTA_EXPR_NAME]) return -EINVAL; type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); -- cgit v1.2.3 From d351c1ea2de3e36e608fc355d8ae7d0cc80e6cd6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 8 Oct 2023 19:36:53 +0200 Subject: netfilter: nft_payload: fix wrong mac header matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mcast packets get looped back to the local machine. Such packets have a 0-length mac header, we should treat this like "mac header not set" and abort rule evaluation. As-is, we just copy data from the network header instead. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Blažej Krajňák Signed-off-by: Florian Westphal --- net/netfilter/nft_payload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 120f6d395b98..0a689c8e0295 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -179,7 +179,7 @@ void nft_payload_eval(const struct nft_expr *expr, switch (priv->base) { case NFT_PAYLOAD_LL_HEADER: - if (!skb_mac_header_was_set(skb)) + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0) goto err; if (skb_vlan_tag_present(skb) && -- cgit v1.2.3 From b7fd68ab1538e3adb665670414bea440f399fda9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 5 Oct 2023 14:56:47 +0100 Subject: drm: Do not overrun array in drm_gem_get_pages() If the shared memory object is larger than the DRM object that it backs, we can overrun the page array. Limit the number of pages we install from each folio to prevent this. Signed-off-by: "Matthew Wilcox (Oracle)" Reported-by: Oleksandr Natalenko Tested-by: Oleksandr Natalenko Link: https://lore.kernel.org/lkml/13360591.uLZWGnKmhe@natalenko.name/ Fixes: 3291e09a4638 ("drm: convert drm_gem_put_pages() to use a folio_batch") Cc: stable@vger.kernel.org # 6.5.x Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231005135648.2317298-1-willy@infradead.org --- drivers/gpu/drm/drm_gem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 6129b89bb366..44a948b80ee1 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -540,7 +540,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj) struct page **pages; struct folio *folio; struct folio_batch fbatch; - int i, j, npages; + long i, j, npages; if (WARN_ON(!obj->filp)) return ERR_PTR(-EINVAL); @@ -564,11 +564,13 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj) i = 0; while (i < npages) { + long nr; folio = shmem_read_folio_gfp(mapping, i, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) goto fail; - for (j = 0; j < folio_nr_pages(folio); j++, i++) + nr = min(npages - i, folio_nr_pages(folio)); + for (j = 0; j < nr; j++, i++) pages[i] = folio_file_page(folio, i); /* Make sure shmem keeps __GFP_DMA32 allocated pages in the -- cgit v1.2.3 From c1165df2be2fffe3adeeaa68f4ee4325108c5e4e Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 10 Oct 2023 18:46:52 +0100 Subject: drm/tiny: correctly print `struct resource *` on error The `res` variable is already a `struct resource *`, don't take the address of it. Fixes incorrect output: simple-framebuffer 9e20dc000.framebuffer: [drm] *ERROR* could not acquire memory range [??? 0xffff4be88a387d00-0xfffffefffde0a240 flags 0x0]: -16 To be correct: simple-framebuffer 9e20dc000.framebuffer: [drm] *ERROR* could not acquire memory range [mem 0x9e20dc000-0x9e307bfff flags 0x200]: -16 Signed-off-by: Joey Gouly Fixes: 9a10c7e6519b ("drm/simpledrm: Add support for system memory framebuffers") Cc: Thomas Zimmermann Cc: Thierry Reding Cc: Javier Martinez Canillas Cc: dri-devel@lists.freedesktop.org Cc: # v6.3+ Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20231010174652.2439513-1-joey.gouly@arm.com --- drivers/gpu/drm/tiny/simpledrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index ff86ba1ae1b8..8ea120eb8674 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -745,7 +745,7 @@ static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv, ret = devm_aperture_acquire_from_firmware(dev, res->start, resource_size(res)); if (ret) { - drm_err(dev, "could not acquire memory range %pr: %d\n", &res, ret); + drm_err(dev, "could not acquire memory range %pr: %d\n", res, ret); return ERR_PTR(ret); } -- cgit v1.2.3 From 14a270bfab7ab1c4b605c01eeca5557447ad5a2b Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 22 Aug 2023 16:49:03 +0000 Subject: riscv: signal: fix sigaltstack frame size checking The alternative stack checking in get_sigframe introduced by the Vector support is not needed and has a problem. It is not needed as we have already validate it at the beginning of the function if we are already on an altstack. If not, the size of an altstack is always validated at its allocation stage with sigaltstack_size_valid(). Besides, we must only regard the size of an altstack if the handler of a signal is registered with SA_ONSTACK. So, blindly checking overflow of an altstack if sas_ss_size not equals to zero will check against wrong signal handlers if only a subset of signals are registered with SA_ONSTACK. Fixes: 8ee0b41898fa ("riscv: signal: Add sigcontext save/restore for vector") Reported-by: Prashanth Swaminathan Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20230822164904.21660-1-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/signal.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 180d951d3624..21a4d0e111bc 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -311,13 +311,6 @@ static inline void __user *get_sigframe(struct ksignal *ksig, /* Align the stack frame. */ sp &= ~0xfUL; - /* - * Fail if the size of the altstack is not large enough for the - * sigframe construction. - */ - if (current->sas_ss_size && sp < current->sas_ss_sp) - return (void __user __force *)-1UL; - return (void __user *)sp; } -- cgit v1.2.3 From e95f3f74465072c2545d8e65a3c3a96e37129cf8 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 5 Oct 2023 16:04:25 -0300 Subject: smb: client: make laundromat a delayed worker By having laundromat kthread processing cached directories on every second turned out to be overkill, especially when having multiple SMB mounts. Relax it by using a delayed worker instead that gets scheduled on every @dir_cache_timeout (default=30) seconds per tcon. This also fixes the 1s delay when tearing down tcon. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 89 ++++++++++++++++++---------------------------- fs/smb/client/cached_dir.h | 2 +- 2 files changed, 36 insertions(+), 55 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index e2be8aedb26e..a9e5d3b7e9a0 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -15,6 +15,7 @@ static struct cached_fid *init_cached_dir(const char *path); static void free_cached_dir(struct cached_fid *cfid); static void smb2_close_cached_fid(struct kref *ref); +static void cfids_laundromat_worker(struct work_struct *work); static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, const char *path, @@ -572,53 +573,46 @@ static void free_cached_dir(struct cached_fid *cfid) kfree(cfid); } -static int -cifs_cfids_laundromat_thread(void *p) +static void cfids_laundromat_worker(struct work_struct *work) { - struct cached_fids *cfids = p; + struct cached_fids *cfids; struct cached_fid *cfid, *q; - struct list_head entry; + LIST_HEAD(entry); - while (!kthread_should_stop()) { - ssleep(1); - INIT_LIST_HEAD(&entry); - if (kthread_should_stop()) - return 0; - spin_lock(&cfids->cfid_list_lock); - list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { - if (time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { - list_del(&cfid->entry); - list_add(&cfid->entry, &entry); - cfids->num_entries--; - } + cfids = container_of(work, struct cached_fids, laundromat_work.work); + + spin_lock(&cfids->cfid_list_lock); + list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { + if (time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { + list_move(&cfid->entry, &entry); + cfids->num_entries--; } - spin_unlock(&cfids->cfid_list_lock); + } + spin_unlock(&cfids->cfid_list_lock); - list_for_each_entry_safe(cfid, q, &entry, entry) { - cfid->on_list = false; - list_del(&cfid->entry); + list_for_each_entry_safe(cfid, q, &entry, entry) { + cfid->on_list = false; + list_del(&cfid->entry); + /* + * Cancel and wait for the work to finish in case we are racing + * with it. + */ + cancel_work_sync(&cfid->lease_break); + if (cfid->has_lease) { /* - * Cancel, and wait for the work to finish in - * case we are racing with it. + * Our lease has not yet been cancelled from the server + * so we need to drop the reference. */ - cancel_work_sync(&cfid->lease_break); - if (cfid->has_lease) { - /* - * We lease has not yet been cancelled from - * the server so we need to drop the reference. - */ - spin_lock(&cfids->cfid_list_lock); - cfid->has_lease = false; - spin_unlock(&cfids->cfid_list_lock); - kref_put(&cfid->refcount, smb2_close_cached_fid); - } + spin_lock(&cfids->cfid_list_lock); + cfid->has_lease = false; + spin_unlock(&cfids->cfid_list_lock); + kref_put(&cfid->refcount, smb2_close_cached_fid); } } - - return 0; + queue_delayed_work(cifsiod_wq, &cfids->laundromat_work, + dir_cache_timeout * HZ); } - struct cached_fids *init_cached_dirs(void) { struct cached_fids *cfids; @@ -629,19 +623,10 @@ struct cached_fids *init_cached_dirs(void) spin_lock_init(&cfids->cfid_list_lock); INIT_LIST_HEAD(&cfids->entries); - /* - * since we're in a cifs function already, we know that - * this will succeed. No need for try_module_get(). - */ - __module_get(THIS_MODULE); - cfids->laundromat = kthread_run(cifs_cfids_laundromat_thread, - cfids, "cifsd-cfid-laundromat"); - if (IS_ERR(cfids->laundromat)) { - cifs_dbg(VFS, "Failed to start cfids laundromat thread.\n"); - kfree(cfids); - module_put(THIS_MODULE); - return NULL; - } + INIT_DELAYED_WORK(&cfids->laundromat_work, cfids_laundromat_worker); + queue_delayed_work(cifsiod_wq, &cfids->laundromat_work, + dir_cache_timeout * HZ); + return cfids; } @@ -657,11 +642,7 @@ void free_cached_dirs(struct cached_fids *cfids) if (cfids == NULL) return; - if (cfids->laundromat) { - kthread_stop(cfids->laundromat); - cfids->laundromat = NULL; - module_put(THIS_MODULE); - } + cancel_delayed_work_sync(&cfids->laundromat_work); spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index a82ff2cea789..81ba0fd5cc16 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -57,7 +57,7 @@ struct cached_fids { spinlock_t cfid_list_lock; int num_entries; struct list_head entries; - struct task_struct *laundromat; + struct delayed_work laundromat_work; }; extern struct cached_fids *init_cached_dirs(void); -- cgit v1.2.3 From 81ba10959970d15c388bf29866b01b62f387e6a3 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 9 Oct 2023 17:37:40 -0300 Subject: smb: client: prevent new fids from being removed by laundromat Check if @cfid->time is set in laundromat so we guarantee that only fully cached fids will be selected for removal. While we're at it, add missing locks to protect access of @cfid fields in order to avoid races with open_cached_dir() and cfids_laundromat_worker(), respectively. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 56 +++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index a9e5d3b7e9a0..fe1bf5b6e0cb 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -170,15 +170,18 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return -ENOENT; } /* - * At this point we either have a lease already and we can just - * return it. If not we are guaranteed to be the only thread accessing - * this cfid. + * Return cached fid if it has a lease. Otherwise, it is either a new + * entry or laundromat worker removed it from @cfids->entries. Caller + * will put last reference if the latter. */ + spin_lock(&cfids->cfid_list_lock); if (cfid->has_lease) { + spin_unlock(&cfids->cfid_list_lock); *ret_cfid = cfid; kfree(utf16_path); return 0; } + spin_unlock(&cfids->cfid_list_lock); /* * Skip any prefix paths in @path as lookup_positive_unlocked() ends up @@ -295,9 +298,11 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, goto oshr_free; } } + spin_lock(&cfids->cfid_list_lock); cfid->dentry = dentry; cfid->time = jiffies; cfid->has_lease = true; + spin_unlock(&cfids->cfid_list_lock); oshr_free: kfree(utf16_path); @@ -306,24 +311,28 @@ oshr_free: free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); spin_lock(&cfids->cfid_list_lock); - if (rc && !cfid->has_lease) { - if (cfid->on_list) { - list_del(&cfid->entry); - cfid->on_list = false; - cfids->num_entries--; + if (!cfid->has_lease) { + if (rc) { + if (cfid->on_list) { + list_del(&cfid->entry); + cfid->on_list = false; + cfids->num_entries--; + } + rc = -ENOENT; + } else { + /* + * We are guaranteed to have two references at this + * point. One for the caller and one for a potential + * lease. Release the Lease-ref so that the directory + * will be closed when the caller closes the cached + * handle. + */ + spin_unlock(&cfids->cfid_list_lock); + kref_put(&cfid->refcount, smb2_close_cached_fid); + goto out; } - rc = -ENOENT; } spin_unlock(&cfids->cfid_list_lock); - if (!rc && !cfid->has_lease) { - /* - * We are guaranteed to have two references at this point. - * One for the caller and one for a potential lease. - * Release the Lease-ref so that the directory will be closed - * when the caller closes the cached handle. - */ - kref_put(&cfid->refcount, smb2_close_cached_fid); - } if (rc) { if (cfid->is_open) SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, @@ -331,7 +340,7 @@ oshr_free: free_cached_dir(cfid); cfid = NULL; } - +out: if (rc == 0) { *ret_cfid = cfid; atomic_inc(&tcon->num_remote_opens); @@ -583,15 +592,18 @@ static void cfids_laundromat_worker(struct work_struct *work) spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { - if (time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { + if (cfid->time && + time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { + cfid->on_list = false; list_move(&cfid->entry, &entry); cfids->num_entries--; + /* To prevent race with smb2_cached_lease_break() */ + kref_get(&cfid->refcount); } } spin_unlock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &entry, entry) { - cfid->on_list = false; list_del(&cfid->entry); /* * Cancel and wait for the work to finish in case we are racing @@ -608,6 +620,8 @@ static void cfids_laundromat_worker(struct work_struct *work) spin_unlock(&cfids->cfid_list_lock); kref_put(&cfid->refcount, smb2_close_cached_fid); } + /* Drop the extra reference opened above */ + kref_put(&cfid->refcount, smb2_close_cached_fid); } queue_delayed_work(cifsiod_wq, &cfids->laundromat_work, dir_cache_timeout * HZ); -- cgit v1.2.3 From 18164f66e6c59fda15c198b371fa008431efdb22 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:52 -0700 Subject: x86/fpu: Allow caller to constrain xfeatures when copying to uabi buffer Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can constrain which xfeatures are saved into the userspace buffer without having to modify the user_xfeatures field in KVM's guest_fpu state. KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to guest must not show up in the effective xstate_bv field of the buffer. Saving only the guest-supported xfeatures allows userspace to load the saved state on a different host with a fewer xfeatures, so long as the target host supports the xfeatures that are exposed to the guest. KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to the set of guest-supported xfeatures, but doing so broke KVM's historical ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that are supported by the *host*. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/fpu/api.h | 3 ++- arch/x86/kernel/fpu/core.c | 5 +++-- arch/x86/kernel/fpu/xstate.c | 7 +++++-- arch/x86/kernel/fpu/xstate.h | 3 ++- arch/x86/kvm/x86.c | 21 +++++++++------------ 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 31089b851c4f..a2be3aefff9f 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -157,7 +157,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { static inline void fpu_sync_guest_vmexit_xfd_state(void) { } #endif -extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); +extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, + unsigned int size, u64 xfeatures, u32 pkru); extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); static inline void fpstate_set_confidential(struct fpu_guest *gfpu) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a86d37052a64..a21a4d0ecc34 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, - unsigned int size, u32 pkru) + unsigned int size, u64 xfeatures, u32 pkru) { struct fpstate *kstate = gfpu->fpstate; union fpregs_state *ustate = buf; struct membuf mb = { .p = buf, .left = size }; if (cpu_feature_enabled(X86_FEATURE_XSAVE)) { - __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE); + __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru, + XSTATE_COPY_XSAVE); } else { memcpy(&ustate->fxsave, &kstate->regs.fxsave, sizeof(ustate->fxsave)); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index cadf68737e6b..76408313ed7f 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1049,6 +1049,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor * @fpstate: The fpstate buffer from which to copy + * @xfeatures: The mask of xfeatures to save (XSAVE mode only) * @pkru_val: The PKRU value to store in the PKRU component * @copy_mode: The requested copy mode * @@ -1059,7 +1060,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * It supports partial copy but @to.pos always starts from zero. */ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, - u32 pkru_val, enum xstate_copy_mode copy_mode) + u64 xfeatures, u32 pkru_val, + enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); struct xregs_state *xinit = &init_fpstate.regs.xsave; @@ -1083,7 +1085,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, break; case XSTATE_COPY_XSAVE: - header.xfeatures &= fpstate->user_xfeatures; + header.xfeatures &= fpstate->user_xfeatures & xfeatures; break; } @@ -1185,6 +1187,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode copy_mode) { __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, + tsk->thread.fpu.fpstate->user_xfeatures, tsk->thread.pkru, copy_mode); } diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index a4ecb04d8d64..3518fb26d06b 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -43,7 +43,8 @@ enum xstate_copy_mode { struct membuf; extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, - u32 pkru_val, enum xstate_copy_mode copy_mode); + u64 xfeatures, u32 pkru_val, + enum xstate_copy_mode copy_mode); extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode mode); extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f18b06bbda6..41d8e6c8570c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5382,26 +5382,23 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return 0; } -static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, - struct kvm_xsave *guest_xsave) + +static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, + u8 *state, unsigned int size) { if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) return; - fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, - guest_xsave->region, - sizeof(guest_xsave->region), + fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, + vcpu->arch.guest_fpu.fpstate->user_xfeatures, vcpu->arch.pkru); } -static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, - u8 *state, unsigned int size) +static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, + struct kvm_xsave *guest_xsave) { - if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return; - - fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, - state, size, vcpu->arch.pkru); + return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 8647c52e9504c99752a39f1d44f6268f82c40a5c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:53 -0700 Subject: KVM: x86: Constrain guest-supported xfeatures only at KVM_GET_XSAVE{2} Mask off xfeatures that aren't exposed to the guest only when saving guest state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly. Preserving the maximal set of xfeatures in user_xfeatures restores KVM's ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0") allowed userspace to load xfeatures that are supported by the host, irrespective of what xfeatures are exposed to the guest. There is no known use case where userspace *intentionally* loads xfeatures that aren't exposed to the guest, but the bug fixed by commit ad856280ddea was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't exposed to the guest, e.g. would lead to userspace unintentionally loading guest-unsupported xfeatures when live migrating a VM. Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially problematic for QEMU-based setups, as QEMU has a bug where instead of terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops loading guest state, i.e. resumes the guest after live migration with incomplete guest state, and ultimately results in guest data corruption. Note, letting userspace restore all host-supported xfeatures does not fix setups where a VM is migrated from a host *without* commit ad856280ddea, to a target with a subset of host-supported xfeatures. However there is no way to safely address that scenario, e.g. KVM could silently drop the unsupported features, but that would be a clear violation of KVM's ABI and so would require userspace to opt-in, at which point userspace could simply be updated to sanitize the to-be-loaded XSAVE state. Reported-by: Tyler Stachecki Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0") Cc: stable@vger.kernel.org Cc: Leonardo Bras Signed-off-by: Sean Christopherson Acked-by: Dave Hansen Message-Id: <20230928001956.924301-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/fpu/xstate.c | 5 +---- arch/x86/kvm/cpuid.c | 8 -------- arch/x86/kvm/x86.c | 18 ++++++++++++++++-- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 76408313ed7f..ef6906107c54 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1539,10 +1539,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, fpregs_restore_userregs(); newfps->xfeatures = curfps->xfeatures | xfeatures; - - if (!guest_fpu) - newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; - + newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; newfps->xfd = curfps->xfd & ~xfeatures; /* Do the final updates within the locked region */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0544e30b4946..773132c3bf5a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -360,14 +360,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); - /* - * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if - * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't - * supported by the host. - */ - vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 | - XFEATURE_MASK_FPSSE; - kvm_update_pv_runtime(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 41d8e6c8570c..1e645f5b1e2c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5386,12 +5386,26 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, u8 *state, unsigned int size) { + /* + * Only copy state for features that are enabled for the guest. The + * state itself isn't problematic, but setting bits in the header for + * features that are supported in *this* host but not exposed to the + * guest can result in KVM_SET_XSAVE failing when live migrating to a + * compatible host without the features that are NOT exposed to the + * guest. + * + * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if + * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't + * supported by the host. + */ + u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 | + XFEATURE_MASK_FPSSE; + if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) return; fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, - vcpu->arch.guest_fpu.fpstate->user_xfeatures, - vcpu->arch.pkru); + supported_xcr0, vcpu->arch.pkru); } static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 60d351f18f7a8acd08964ef1d5c948354a7907be Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:54 -0700 Subject: KVM: selftests: Touch relevant XSAVE state in guest for state test Modify support XSAVE state in the "state test's" guest code so that saving and loading state via KVM_{G,S}ET_XSAVE actually does something useful, i.e. so that xstate_bv in XSAVE state isn't empty. Punt on BNDCSR for now, it's easier to just stuff that xfeature from the host side. Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/processor.h | 14 ++++ tools/testing/selftests/kvm/x86_64/state_test.c | 77 ++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 4fd042112526..6f66861175ad 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -68,6 +68,12 @@ struct xstate { #define XFEATURE_MASK_OPMASK BIT_ULL(5) #define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6) #define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7) +#define XFEATURE_MASK_PT BIT_ULL(8) +#define XFEATURE_MASK_PKRU BIT_ULL(9) +#define XFEATURE_MASK_PASID BIT_ULL(10) +#define XFEATURE_MASK_CET_USER BIT_ULL(11) +#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12) +#define XFEATURE_MASK_LBR BIT_ULL(15) #define XFEATURE_MASK_XTILE_CFG BIT_ULL(17) #define XFEATURE_MASK_XTILE_DATA BIT_ULL(18) @@ -147,6 +153,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24) #define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2) #define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) +#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4) #define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) #define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) #define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) @@ -553,6 +560,13 @@ static inline void xsetbv(u32 index, u64 value) __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); } +static inline void wrpkru(u32 pkru) +{ + /* Note, ECX and EDX are architecturally required to be '0'. */ + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (pkru), "c"(0), "d"(0)); +} + static inline struct desc_ptr get_gdt(void) { struct desc_ptr gdt; diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 4c4925a8ab45..df3e93df4343 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -139,6 +139,83 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) static void __attribute__((__flatten__)) guest_code(void *arg) { GUEST_SYNC(1); + + if (this_cpu_has(X86_FEATURE_XSAVE)) { + uint64_t supported_xcr0 = this_cpu_supported_xcr0(); + uint8_t buffer[4096]; + + memset(buffer, 0xcc, sizeof(buffer)); + + set_cr4(get_cr4() | X86_CR4_OSXSAVE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + xsetbv(0, xgetbv(0) | supported_xcr0); + + /* + * Modify state for all supported xfeatures to take them out of + * their "init" state, i.e. to make them show up in XSTATE_BV. + * + * Note off-by-default features, e.g. AMX, are out of scope for + * this particular testcase as they have a different ABI. + */ + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); + asm volatile ("fincstp"); + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); + asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_YMM) + asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_AVX512) { + asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); + asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); + asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); + } + + if (this_cpu_has(X86_FEATURE_MPX)) { + uint64_t bounds[2] = { 10, 0xffffffffull }; + uint64_t output[2] = { }; + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); + + /* + * Don't bother trying to get BNDCSR into the INUSE + * state. MSR_IA32_BNDCFGS doesn't count as it isn't + * managed via XSAVE/XRSTOR, and BNDCFGU can only be + * modified by XRSTOR. Stuffing XSTATE_BV in the host + * is simpler than doing XRSTOR here in the guest. + * + * However, temporarily enable MPX in BNDCFGS so that + * BNDMOV actually loads BND1. If MPX isn't *fully* + * enabled, all MPX instructions are treated as NOPs. + * + * Hand encode "bndmov (%rax),%bnd1" as support for MPX + * mnemonics/registers has been removed from gcc and + * clang (and was never fully supported by clang). + */ + wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); + asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); + /* + * Hand encode "bndmov %bnd1, (%rax)" to sanity check + * that BND1 actually got loaded. + */ + asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); + wrmsr(MSR_IA32_BNDCFGS, 0); + + GUEST_ASSERT_EQ(bounds[0], output[0]); + GUEST_ASSERT_EQ(bounds[1], output[1]); + } + if (this_cpu_has(X86_FEATURE_PKU)) { + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); + set_cr4(get_cr4() | X86_CR4_PKE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); + + wrpkru(-1u); + } + } + GUEST_SYNC(2); if (arg) { -- cgit v1.2.3 From 77709820787355f45755fe454e26fca8584ecf40 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:55 -0700 Subject: KVM: selftests: Load XSAVE state into untouched vCPU during state test Expand x86's state test to load XSAVE state into a "dummy" vCPU prior to KVM_SET_CPUID2, and again with an empty guest CPUID model. Except for off-by-default features, i.e. AMX, KVM's ABI for KVM_SET_XSAVE is that userspace is allowed to load xfeatures so long as they are supported by the host. This is a regression test for a combination of KVM bugs where the state saved by KVM_GET_XSAVE{2} could not be loaded via KVM_SET_XSAVE if the saved xstate_bv would load guest-unsupported xfeatures. Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/state_test.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index df3e93df4343..115b2cdf9279 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -231,9 +231,9 @@ static void __attribute__((__flatten__)) guest_code(void *arg) int main(int argc, char *argv[]) { vm_vaddr_t nested_gva = 0; - + struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu, *vcpuN; struct kvm_vm *vm; struct kvm_x86_state *state; struct ucall uc; @@ -286,6 +286,21 @@ int main(int argc, char *argv[]) /* Restore state in a new VM. */ vcpu = vm_recreate_with_one_vcpu(vm); vcpu_load_state(vcpu, state); + + /* + * Restore XSAVE state in a dummy vCPU, first without doing + * KVM_SET_CPUID2, and then with an empty guest CPUID. Except + * for off-by-default xfeatures, e.g. AMX, KVM is supposed to + * allow KVM_SET_XSAVE regardless of guest CPUID. Manually + * load only XSAVE state, MSRs in particular have a much more + * convoluted ABI. + */ + vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); + vcpu_xsave_set(vcpuN, state->xsave); + + vcpu_init_cpuid(vcpuN, &empty_cpuid); + vcpu_xsave_set(vcpuN, state->xsave); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); -- cgit v1.2.3 From 87e3ca055cdc6c63fb82b9bec7c370405d03f6ef Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:56 -0700 Subject: KVM: selftests: Force load all supported XSAVE state in state test Extend x86's state to forcefully load *all* host-supported xfeatures by modifying xstate_bv in the saved state. Stuffing xstate_bv ensures that the selftest is verifying KVM's full ABI regardless of whether or not the guest code is successful in getting various xfeatures out of their INIT state, e.g. see the disaster that is/was MPX. Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/processor.h | 9 +++++++++ tools/testing/selftests/kvm/x86_64/state_test.c | 14 ++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 6f66861175ad..25bc61dac5fb 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -922,6 +922,15 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) !kvm_cpu_has(feature.anti_feature); } +static __always_inline uint64_t kvm_cpu_supported_xcr0(void) +{ + if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) + return 0; + + return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | + ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); +} + static inline size_t kvm_cpuid2_size(int nr_entries) { return sizeof(struct kvm_cpuid2) + diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 115b2cdf9279..88b58aab7207 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -230,6 +230,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg) int main(int argc, char *argv[]) { + uint64_t *xstate_bv, saved_xstate_bv; vm_vaddr_t nested_gva = 0; struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; @@ -294,12 +295,25 @@ int main(int argc, char *argv[]) * allow KVM_SET_XSAVE regardless of guest CPUID. Manually * load only XSAVE state, MSRs in particular have a much more * convoluted ABI. + * + * Load two versions of XSAVE state: one with the actual guest + * XSAVE state, and one with all supported features forced "on" + * in xstate_bv, e.g. to ensure that KVM allows loading all + * supported features, even if something goes awry in saving + * the original snapshot. */ + xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; + saved_xstate_bv = *xstate_bv; + vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = kvm_cpu_supported_xcr0(); + vcpu_xsave_set(vcpuN, state->xsave); vcpu_init_cpuid(vcpuN, &empty_cpuid); vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = saved_xstate_bv; + vcpu_xsave_set(vcpuN, state->xsave); kvm_x86_state_cleanup(state); -- cgit v1.2.3 From b65235f6e102354ccafda601eaa1c5bef5284d21 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 28 Sep 2023 20:33:51 +0300 Subject: x86: KVM: SVM: always update the x2avic msr interception The following problem exists since x2avic was enabled in the KVM: svm_set_x2apic_msr_interception is called to enable the interception of the x2apic msrs. In particular it is called at the moment the guest resets its apic. Assuming that the guest's apic was in x2apic mode, the reset will bring it back to the xapic mode. The svm_set_x2apic_msr_interception however has an erroneous check for '!apic_x2apic_mode()' which prevents it from doing anything in this case. As a result of this, all x2apic msrs are left unintercepted, and that exposes the bare metal x2apic (if enabled) to the guest. Oops. Remove the erroneous '!apic_x2apic_mode()' check to fix that. This fixes CVE-2023-5090 Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode") Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Reviewed-by: Suravee Suthikulpanit Tested-by: Suravee Suthikulpanit Reviewed-by: Sean Christopherson Message-Id: <20230928173354.217464-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9507df93f410..acdd0b89e471 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -913,8 +913,7 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept) if (intercept == svm->x2avic_msrs_intercepted) return; - if (!x2avic_enabled || - !apic_x2apic_mode(svm->vcpu.arch.apic)) + if (!x2avic_enabled) return; for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) { -- cgit v1.2.3 From 2dcf37abf9d3aab7f975002d29fc7c17272def38 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 28 Sep 2023 20:33:52 +0300 Subject: x86: KVM: SVM: add support for Invalid IPI Vector interception In later revisions of AMD's APM, there is a new 'incomplete IPI' exit code: "Invalid IPI Vector - The vector for the specified IPI was set to an illegal value (VEC < 16)" Note that tests on Zen2 machine show that this VM exit doesn't happen and instead AVIC just does nothing. Add support for this exit code by doing nothing, instead of filling the kernel log with errors. Also replace an unthrottled 'pr_err()' if another unknown incomplete IPI exit happens with vcpu_unimpl() (e.g in case AMD adds yet another 'Invalid IPI' exit reason) Cc: Signed-off-by: Maxim Levitsky Reviewed-by: Sean Christopherson Message-Id: <20230928173354.217464-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 1 + arch/x86/kvm/svm/avic.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 19bf955b67e0..3ac0ffc4f3e2 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -268,6 +268,7 @@ enum avic_ipi_failure_cause { AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, AVIC_IPI_FAILURE_INVALID_TARGET, AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, + AVIC_IPI_FAILURE_INVALID_IPI_VECTOR, }; #define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 2092db892d7d..4b74ea91f4e6 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: WARN_ONCE(1, "Invalid backing page\n"); break; + case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR: + /* Invalid IPI with vector < 16 */ + break; default: - pr_err("Unknown IPI interception\n"); + vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n"); } return 1; -- cgit v1.2.3 From 3fdc6087df3be73a212a81ce5dd6516638568806 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 28 Sep 2023 20:33:53 +0300 Subject: x86: KVM: SVM: refresh AVIC inhibition in svm_leave_nested() svm_leave_nested() similar to a nested VM exit, get the vCPU out of nested mode and thus should end the local inhibition of AVIC on this vCPU. Failure to do so, can lead to hangs on guest reboot. Raise the KVM_REQ_APICV_UPDATE request to refresh the AVIC state of the current vCPU in this case. Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running") Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Reviewed-by: Sean Christopherson Message-Id: <20230928173354.217464-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index dd496c9e5f91..3fea8c47679e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1253,6 +1253,9 @@ void svm_leave_nested(struct kvm_vcpu *vcpu) nested_svm_uninit_mmu_context(vcpu); vmcb_mark_all_dirty(svm->vmcb); + + if (kvm_apicv_activated(vcpu->kvm)) + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); } kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); -- cgit v1.2.3 From 3e9346734661cc20bd77aeb43dbf4e5f46a2c16e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Oct 2023 13:28:54 -0500 Subject: KVM: SVM: Fix build error when using -Werror=unused-but-set-variable Commit 916e3e5f26ab ("KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX") introduced a local variable used for the rdmsr() function for the high 32-bits of the MSR value. This variable is not used after being set and triggers a warning or error, when treating warnings as errors, when the unused-but-set-variable flag is set. Mark this variable as __maybe_unused to fix this. Fixes: 916e3e5f26ab ("KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX") Signed-off-by: Tom Lendacky Reviewed-by: Sean Christopherson Message-Id: <0da9874b6e9fcbaaa5edeb345d7e2a7c859fc818.1696271334.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index acdd0b89e471..beea99c8e8e0 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -691,7 +691,7 @@ static int svm_hardware_enable(void) */ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { struct sev_es_save_area *hostsa; - u32 msr_hi; + u32 __maybe_unused msr_hi; hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); -- cgit v1.2.3 From 60197a4631b907b30343e25af702257d92f359cf Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 12 Oct 2023 12:16:17 +0530 Subject: KVM: arm64: pmu: Drop redundant check for non-NULL kvm_pmu_events There is an allocated and valid struct kvm_pmu_events for each cpu on the system via DEFINE_PER_CPU(). Hence there cannot be a NULL pointer accessed via this_cpu_ptr() in the helper kvm_get_pmu_events(). Hence non-NULL check for pmu in such places are redundant and can be dropped. Cc: Marc Zyngier Cc: Oliver Upton Cc: James Morse Cc: Suzuki K Poulose Cc: kvmarm@lists.linux.dev Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231012064617.897346-1-anshuman.khandual@arm.com --- arch/arm64/kvm/pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 0eea225fd09a..a243934c5568 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -39,7 +39,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) { struct kvm_pmu_events *pmu = kvm_get_pmu_events(); - if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr)) + if (!kvm_arm_support_pmu_v3() || !kvm_pmu_switch_needed(attr)) return; if (!attr->exclude_host) @@ -55,7 +55,7 @@ void kvm_clr_pmu_events(u32 clr) { struct kvm_pmu_events *pmu = kvm_get_pmu_events(); - if (!kvm_arm_support_pmu_v3() || !pmu) + if (!kvm_arm_support_pmu_v3()) return; pmu->events_host &= ~clr; -- cgit v1.2.3 From e2145c99b53ec88105c69bbbb577265660d08c69 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Oct 2023 11:25:40 -0400 Subject: KVM: MIPS: fix -Wunused-but-set-variable warning The variable is completely unused, remove it. Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 7b2ac1319d70..467ee6b95ae1 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -592,7 +592,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, gfn_t gfn = gpa >> PAGE_SHIFT; int srcu_idx, err; kvm_pfn_t pfn; - pte_t *ptep, entry, old_pte; + pte_t *ptep, entry; bool writeable; unsigned long prot_bits; unsigned long mmu_seq; @@ -664,7 +664,6 @@ retry: entry = pfn_pte(pfn, __pgprot(prot_bits)); /* Write the PTE */ - old_pte = *ptep; set_pte(ptep, entry); err = 0; -- cgit v1.2.3 From 0fd76865006dfdc3cdc6dade538ca2257a847364 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 12 Oct 2023 13:34:58 +0100 Subject: KVM: arm64: Add nPIR{E0}_EL1 to HFG traps nPIR_EL1 and nPIREO_EL1 are part of the 'reverse polarity' set of bits, set them so that we disable the traps for a guest. Unfortunately, these bits are not yet described in the ARM ARM, but only live in the XML description. Also add them to the NV FGT forwarding infrastructure. Signed-off-by: Joey Gouly Fixes: e930694e6145 ("KVM: arm64: Restructure FGT register switching") Cc: Oliver Upton [maz: add entries to the NV FGT array, commit message update] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231012123459.2820835-2-joey.gouly@arm.com --- arch/arm64/include/asm/kvm_arm.h | 4 ++-- arch/arm64/kvm/emulate-nested.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 5882b2415596..1095c6647e96 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -344,14 +344,14 @@ */ #define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51)) #define __HFGRTR_EL2_MASK GENMASK(49, 0) -#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50)) +#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) #define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \ BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ GENMASK(26, 25) | BIT(21) | BIT(18) | \ GENMASK(15, 14) | GENMASK(10, 9) | BIT(2)) #define __HFGWTR_EL2_MASK GENMASK(49, 0) -#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50)) +#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) #define __HFGITR_EL2_RES0 GENMASK(63, 57) #define __HFGITR_EL2_MASK GENMASK(54, 0) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 9ced1bf0c2b7..ee902ff2a50f 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -977,6 +977,8 @@ enum fg_filter_id { static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { /* HFGRTR_EL2, HFGWTR_EL2 */ + SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0), + SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0), SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0), SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0), SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0), -- cgit v1.2.3 From 839d90357b7ce6b129045d28ac22bd3a247e2350 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 12 Oct 2023 13:34:59 +0100 Subject: KVM: arm64: POR{E0}_EL1 do not need trap handlers These will not be trapped by KVM, so don't need a handler. Signed-off-by: Joey Gouly Cc: Marc Zyngier Cc: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231012123459.2820835-3-joey.gouly@arm.com --- arch/arm64/kvm/sys_regs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e92ec810d449..0afd6136e275 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2122,8 +2122,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, - { SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 }, - { SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 }, + { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 }, + { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, { SYS_DESC(SYS_LORSA_EL1), trap_loregion }, -- cgit v1.2.3 From e001d1447cd4585d7f23a44ff668ba2bc624badb Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 12 Oct 2023 15:24:17 +0300 Subject: fs: factor out vfs_parse_monolithic_sep() helper Factor out vfs_parse_monolithic_sep() from generic_parse_monolithic(), so filesystems could use it with a custom option separator callback. Acked-by: Christian Brauner Signed-off-by: Amir Goldstein --- fs/fs_context.c | 34 +++++++++++++++++++++++++++++----- include/linux/fs_context.h | 2 ++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/fs/fs_context.c b/fs/fs_context.c index a0ad7a0c4680..98589aae5208 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -192,17 +192,19 @@ int vfs_parse_fs_string(struct fs_context *fc, const char *key, EXPORT_SYMBOL(vfs_parse_fs_string); /** - * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * vfs_parse_monolithic_sep - Parse key[=val][,key[=val]]* mount data * @fc: The superblock configuration to fill in. * @data: The data to parse + * @sep: callback for separating next option * - * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be - * called from the ->monolithic_mount_data() fs_context operation. + * Parse a blob of data that's in key[=val][,key[=val]]* form with a custom + * option separator callback. * * Returns 0 on success or the error returned by the ->parse_option() fs_context * operation on failure. */ -int generic_parse_monolithic(struct fs_context *fc, void *data) +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)) { char *options = data, *key; int ret = 0; @@ -214,7 +216,7 @@ int generic_parse_monolithic(struct fs_context *fc, void *data) if (ret) return ret; - while ((key = strsep(&options, ",")) != NULL) { + while ((key = sep(&options)) != NULL) { if (*key) { size_t v_len = 0; char *value = strchr(key, '='); @@ -233,6 +235,28 @@ int generic_parse_monolithic(struct fs_context *fc, void *data) return ret; } +EXPORT_SYMBOL(vfs_parse_monolithic_sep); + +static char *vfs_parse_comma_sep(char **s) +{ + return strsep(s, ","); +} + +/** + * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * @fc: The superblock configuration to fill in. + * @data: The data to parse + * + * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be + * called from the ->monolithic_mount_data() fs_context operation. + * + * Returns 0 on success or the error returned by the ->parse_option() fs_context + * operation on failure. + */ +int generic_parse_monolithic(struct fs_context *fc, void *data) +{ + return vfs_parse_monolithic_sep(fc, data, vfs_parse_comma_sep); +} EXPORT_SYMBOL(generic_parse_monolithic); /** diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 96332db693d5..c13e99cbbf81 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -136,6 +136,8 @@ extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value, size_t v_size); +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)); extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); -- cgit v1.2.3 From c34706acf40b43dd31f67c92c5a95d39666a1eb3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 12 Oct 2023 16:08:28 +0300 Subject: ovl: fix regression in parsing of mount options with escaped comma Ever since commit 91c77947133f ("ovl: allow filenames with comma"), the following example was legit overlayfs mount options: mount -t overlay overlay -o 'lowerdir=/tmp/a\,b/lower' /mnt The conversion to new mount api moved to using the common helper generic_parse_monolithic() and discarded the specialized ovl_next_opt() option separator. Bring back ovl_next_opt() and use vfs_parse_monolithic_sep() to fix the regression. Reported-by: Ryan Hendrickson Closes: https://lore.kernel.org/r/8da307fb-9318-cf78-8a27-ba5c5a0aef6d@alum.mit.edu/ Fixes: 1784fbc2ed9c ("ovl: port to new mount api") Signed-off-by: Amir Goldstein --- fs/overlayfs/params.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 95b751507ac8..17c74ef4f089 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -157,6 +157,34 @@ const struct fs_parameter_spec ovl_parameter_spec[] = { {} }; +static char *ovl_next_opt(char **s) +{ + char *sbegin = *s; + char *p; + + if (sbegin == NULL) + return NULL; + + for (p = sbegin; *p; p++) { + if (*p == '\\') { + p++; + if (!*p) + break; + } else if (*p == ',') { + *p = '\0'; + *s = p + 1; + return sbegin; + } + } + *s = NULL; + return sbegin; +} + +static int ovl_parse_monolithic(struct fs_context *fc, void *data) +{ + return vfs_parse_monolithic_sep(fc, data, ovl_next_opt); +} + static ssize_t ovl_parse_param_split_lowerdirs(char *str) { ssize_t nr_layers = 1, nr_colons = 0; @@ -682,6 +710,7 @@ static int ovl_reconfigure(struct fs_context *fc) } static const struct fs_context_operations ovl_context_ops = { + .parse_monolithic = ovl_parse_monolithic, .parse_param = ovl_parse_param, .get_tree = ovl_get_tree, .reconfigure = ovl_reconfigure, -- cgit v1.2.3 From 9404673293b065cbb16b8915530147cac7e80b4d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Aug 2023 13:18:10 +0100 Subject: KVM: arm64: timers: Correctly handle TGE flip with CNTPOFF_EL2 Contrary to common belief, HCR_EL2.TGE has a direct and immediate effect on the way the EL0 physical counter is offset. Flipping TGE from 1 to 0 while at EL2 immediately changes the way the counter compared to the CVAL limit. This means that we cannot directly save/restore the guest's view of CVAL, but that we instead must treat it as if CNTPOFF didn't exist. Only in the world switch, once we figure out that we do have CNTPOFF, can we must the offset back and forth depending on the polarity of TGE. Fixes: 2b4825a86940 ("KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer") Reported-by: Ganapatrao Kulkarni Tested-by: Ganapatrao Kulkarni Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 13 +++--------- arch/arm64/kvm/hyp/vhe/switch.c | 44 +++++++++++++++++++++++++++++++++++++++++ include/kvm/arm_arch_timer.h | 7 +++++++ 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 6dcdae4d38cb..a1e24228aaaa 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = { .get_input_level = kvm_arch_timer_get_input_level, }; -static bool has_cntpoff(void) -{ - return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); -} - static int nr_timers(struct kvm_vcpu *vcpu) { if (!vcpu_has_nv(vcpu)) @@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void) return timecounter->cc->read(timecounter->cc); } -static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) { if (vcpu_has_nv(vcpu)) { if (is_hyp_ctxt(vcpu)) { @@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx) timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); cval = read_sysreg_el0(SYS_CNTP_CVAL); - if (!has_cntpoff()) - cval -= timer_get_offset(ctx); + cval -= timer_get_offset(ctx); timer_set_cval(ctx, cval); @@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx) cval = timer_get_cval(ctx); offset = timer_get_offset(ctx); set_cntpoff(offset); - if (!has_cntpoff()) - cval += offset; + cval += offset; write_sysreg_el0(cval, SYS_CNTP_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 6537f58b1a8c..448b17080d36 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu) ___activate_traps(vcpu); + if (has_cntpoff()) { + struct timer_map map; + + get_timer_map(vcpu, &map); + + /* + * We're entrering the guest. Reload the correct + * values from memory now that TGE is clear. + */ + if (map.direct_ptimer == vcpu_ptimer(vcpu)) + val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + if (map.direct_ptimer == vcpu_hptimer(vcpu)) + val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); + + if (map.direct_ptimer) { + write_sysreg_el0(val, SYS_CNTP_CVAL); + isb(); + } + } + val = read_sysreg(cpacr_el1); val |= CPACR_ELx_TTA; val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN | @@ -77,6 +97,30 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + if (has_cntpoff()) { + struct timer_map map; + u64 val, offset; + + get_timer_map(vcpu, &map); + + /* + * We're exiting the guest. Save the latest CVAL value + * to memory and apply the offset now that TGE is set. + */ + val = read_sysreg_el0(SYS_CNTP_CVAL); + if (map.direct_ptimer == vcpu_ptimer(vcpu)) + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val; + if (map.direct_ptimer == vcpu_hptimer(vcpu)) + __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val; + + offset = read_sysreg_s(SYS_CNTPOFF_EL2); + + if (map.direct_ptimer && offset) { + write_sysreg_el0(val + offset, SYS_CNTP_CVAL); + isb(); + } + } + /* * ARM errata 1165522 and 1530923 require the actual execution of the * above before we can switch to the EL2/EL0 translation regime used by diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index bb3cb005873e..e748bc957d83 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -82,6 +82,8 @@ struct timer_map { struct arch_timer_context *emul_ptimer; }; +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map); + struct arch_timer_cpu { struct arch_timer_context timers[NR_KVM_TIMERS]; @@ -145,4 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt); void kvm_timer_cpu_up(void); void kvm_timer_cpu_down(void); +static inline bool has_cntpoff(void) +{ + return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); +} + #endif -- cgit v1.2.3 From 13cc9ee8f8ed58e563294d87d74a62006be40f21 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 12 Oct 2023 13:09:02 -0400 Subject: cgroup: Fix incorrect css_set_rwsem reference in comment Since commit f0d9a5f17575 ("cgroup: make css_set_rwsem a spinlock and rename it to css_set_lock"), css_set_rwsem has been replaced by css_set_lock. That commit, however, missed the css_set_rwsem reference in include/linux/cgroup-defs.h. Fix that by changing it to css_set_lock as well. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index f1b3151ac30b..265da00a1a8b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -238,7 +238,7 @@ struct css_set { * Lists running through all tasks using this cgroup group. * mg_tasks lists tasks which belong to this cset but are in the * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to + * css_set_lock, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ struct list_head tasks; -- cgit v1.2.3 From bd9e7326b8d512ee724006d4ec06dfbf3096ae9e Mon Sep 17 00:00:00 2001 From: WangJinchao Date: Thu, 12 Oct 2023 15:17:38 +0800 Subject: workqueue: doc: Fix function and sysfs path errors alloc_ordered_queue -> alloc_ordered_workqueue /sys/devices/virtual/WQ_NAME/ -> /sys/devices/virtual/workqueue/WQ_NAME/ Signed-off-by: WangJinchao Signed-off-by: Tejun Heo --- Documentation/core-api/workqueue.rst | 4 ++-- Documentation/translations/zh_CN/core-api/workqueue.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index 5d7b01aed1fe..0046af06531a 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -244,7 +244,7 @@ unbound worker-pools and only one work item could be active at any given time thus achieving the same ordering property as ST wq. In the current implementation the above configuration only guarantees -ST behavior within a given NUMA node. Instead ``alloc_ordered_queue()`` should +ST behavior within a given NUMA node. Instead ``alloc_ordered_workqueue()`` should be used to achieve system-wide ST behavior. @@ -390,7 +390,7 @@ The default affinity scope can be changed with the module parameter scope can be changed using ``apply_workqueue_attrs()``. If ``WQ_SYSFS`` is set, the workqueue will have the following affinity scope -related interface files under its ``/sys/devices/virtual/WQ_NAME/`` +related interface files under its ``/sys/devices/virtual/workqueue/WQ_NAME/`` directory. ``affinity_scope`` diff --git a/Documentation/translations/zh_CN/core-api/workqueue.rst b/Documentation/translations/zh_CN/core-api/workqueue.rst index 6c1b5ec31d75..7fac6f75d078 100644 --- a/Documentation/translations/zh_CN/core-api/workqueue.rst +++ b/Documentation/translations/zh_CN/core-api/workqueue.rst @@ -202,7 +202,7 @@ workqueue将自动创建与属性相匹配的后备工作者池。调节并发 同的排序属性。 在目前的实现中,上述配置只保证了特定NUMA节点内的ST行为。相反, -``alloc_ordered_queue()`` 应该被用来实现全系统的ST行为。 +``alloc_ordered_workqueue()`` 应该被用来实现全系统的ST行为。 执行场景示例 -- cgit v1.2.3 From 7b42f401fc6571b6604441789d892d440829e33c Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 11 Oct 2023 16:27:59 +0800 Subject: workqueue: Use the kmem_cache_free() instead of kfree() to release pwq Currently, the kfree() be used for pwq objects allocated with kmem_cache_alloc() in alloc_and_link_pwqs(), this isn't wrong. but usually, use "trace_kmem_cache_alloc/trace_kmem_cache_free" to track memory allocation and free. this commit therefore use kmem_cache_free() instead of kfree() in alloc_and_link_pwqs() and also consistent with release of the pwq in rcu_free_pwq(). Signed-off-by: Zqiang Signed-off-by: Tejun Heo --- kernel/workqueue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ebe24a5e1435..6f74cab2bd5a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4610,8 +4610,12 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) enomem: if (wq->cpu_pwq) { - for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(wq->cpu_pwq, cpu)); + for_each_possible_cpu(cpu) { + struct pool_workqueue *pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); + + if (pwq) + kmem_cache_free(pwq_cache, pwq); + } free_percpu(wq->cpu_pwq); wq->cpu_pwq = NULL; } -- cgit v1.2.3 From 8698cb92eeece1e326a4d6a051bcf143037c4d31 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 8 Aug 2023 14:21:30 +0300 Subject: net/mlx5: Perform DMA operations in the right locations The cited patch change mlx5 driver so that during probe DMA operations were performed before pci_enable_device(), and during teardown DMA operations were performed after pci_disable_device(). DMA operations require PCI to be enabled. Hence, The above leads to the following oops in PPC systems[1]. On s390x systems, as reported by Niklas Schnelle, this is a problem because mlx5_pci_init() is where the DMA and coherent mask is set but mlx5_cmd_init() already does a dma_alloc_coherent(). Thus a DMA allocation is done during probe before the correct mask is set. This causes probe to fail initialization of the cmdif SW structs on s390x after that is converted to the common dma-iommu code. This is because on s390x DMA addresses below 4 GiB are reserved on current machines and unlike the old s390x specific DMA API implementation common code enforces DMA masks. Fix it by performing the DMA operations during probe after pci_enable_device() and after the dma mask is set, and during teardown before pci_disable_device(). [1] Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user iptable_nat xt_addrtype xt_conntrack nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 netconsole rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser ib_umad rdma_cm ib_ipoib iw_cm libiscsi scsi_transport_iscsi ib_cm ib_uverbs ib_core mlx5_core(-) ptp pps_core fuse vmx_crypto crc32c_vpmsum [last unloaded: mlx5_ib] CPU: 1 PID: 8937 Comm: modprobe Not tainted 6.5.0-rc3_for_upstream_min_debug_2023_07_31_16_02 #1 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries NIP: c000000000423388 LR: c0000000001e733c CTR: c0000000001e4720 REGS: c0000000055636d0 TRAP: 0380 Not tainted (6.5.0-rc3_for_upstream_min_debug_2023_07_31_16_02) MSR: 8000000000009033 CR: 24008884 XER: 20040000 CFAR: c0000000001e7338 IRQMASK: 0 NIP [c000000000423388] __free_pages+0x28/0x160 LR [c0000000001e733c] dma_direct_free+0xac/0x190 Call Trace: [c000000005563970] [5deadbeef0000100] 0x5deadbeef0000100 (unreliable) [c0000000055639b0] [c0000000003d46cc] kfree+0x7c/0x150 [c000000005563a40] [c0000000001e47c8] dma_free_attrs+0xa8/0x1a0 [c000000005563aa0] [c008000000d0064c] mlx5_cmd_cleanup+0xa4/0x100 [mlx5_core] [c000000005563ad0] [c008000000cf629c] mlx5_mdev_uninit+0xf4/0x140 [mlx5_core] [c000000005563b00] [c008000000cf6448] remove_one+0x160/0x1d0 [mlx5_core] [c000000005563b40] [c000000000958540] pci_device_remove+0x60/0x110 [c000000005563b80] [c000000000a35e80] device_remove+0x70/0xd0 [c000000005563bb0] [c000000000a37a38] device_release_driver_internal+0x2a8/0x330 [c000000005563c00] [c000000000a37b8c] driver_detach+0x8c/0x160 [c000000005563c40] [c000000000a35350] bus_remove_driver+0x90/0x110 [c000000005563c80] [c000000000a38948] driver_unregister+0x48/0x90 [c000000005563cf0] [c000000000957e38] pci_unregister_driver+0x38/0x150 [c000000005563d40] [c008000000eb6140] mlx5_cleanup+0x38/0x90 [mlx5_core] Fixes: 06cd555f73ca ("net/mlx5: split mlx5_cmd_init() to probe and reload routines") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Reviewed-by: Leon Romanovsky Reviewed-by: Niklas Schnelle Tested-by: Niklas Schnelle Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 64 ++++++++++++--------------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index afb348579577..c22b0ad0c870 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -2186,52 +2186,23 @@ static u16 cmdif_rev(struct mlx5_core_dev *dev) int mlx5_cmd_init(struct mlx5_core_dev *dev) { - int size = sizeof(struct mlx5_cmd_prot_block); - int align = roundup_pow_of_two(size); struct mlx5_cmd *cmd = &dev->cmd; - u32 cmd_l; - int err; - - cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) - return -ENOMEM; - err = alloc_cmd_page(dev, cmd); - if (err) - goto err_free_pool; - - cmd_l = (u32)(cmd->dma); - if (cmd_l & 0xfff) { - mlx5_core_err(dev, "invalid command queue address\n"); - err = -ENOMEM; - goto err_cmd_page; - } cmd->checksum_disabled = 1; spin_lock_init(&cmd->alloc_lock); spin_lock_init(&cmd->token_lock); - create_msg_cache(dev); - set_wqname(dev); cmd->wq = create_singlethread_workqueue(cmd->wq_name); if (!cmd->wq) { mlx5_core_err(dev, "failed to create command workqueue\n"); - err = -ENOMEM; - goto err_cache; + return -ENOMEM; } mlx5_cmdif_debugfs_init(dev); return 0; - -err_cache: - destroy_msg_cache(dev); -err_cmd_page: - free_cmd_page(dev, cmd); -err_free_pool: - dma_pool_destroy(cmd->pool); - return err; } void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) @@ -2240,15 +2211,15 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) mlx5_cmdif_debugfs_cleanup(dev); destroy_workqueue(cmd->wq); - destroy_msg_cache(dev); - free_cmd_page(dev, cmd); - dma_pool_destroy(cmd->pool); } int mlx5_cmd_enable(struct mlx5_core_dev *dev) { + int size = sizeof(struct mlx5_cmd_prot_block); + int align = roundup_pow_of_two(size); struct mlx5_cmd *cmd = &dev->cmd; u32 cmd_h, cmd_l; + int err; memset(&cmd->vars, 0, sizeof(cmd->vars)); cmd->vars.cmdif_rev = cmdif_rev(dev); @@ -2281,10 +2252,21 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) sema_init(&cmd->vars.pages_sem, 1); sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); + if (!cmd->pool) + return -ENOMEM; + + err = alloc_cmd_page(dev, cmd); + if (err) + goto err_free_pool; + cmd_h = (u32)((u64)(cmd->dma) >> 32); cmd_l = (u32)(cmd->dma); - if (WARN_ON(cmd_l & 0xfff)) - return -EINVAL; + if (cmd_l & 0xfff) { + mlx5_core_err(dev, "invalid command queue address\n"); + err = -ENOMEM; + goto err_cmd_page; + } iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); @@ -2297,17 +2279,27 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) cmd->mode = CMD_MODE_POLLING; cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; + create_msg_cache(dev); create_debugfs_files(dev); return 0; + +err_cmd_page: + free_cmd_page(dev, cmd); +err_free_pool: + dma_pool_destroy(cmd->pool); + return err; } void mlx5_cmd_disable(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; - clean_debug_files(dev); flush_workqueue(cmd->wq); + clean_debug_files(dev); + destroy_msg_cache(dev); + free_cmd_page(dev, cmd); + dma_pool_destroy(cmd->pool); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, -- cgit v1.2.3 From 7624e58a8b3a251e3e5108b32f2183b34453db32 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 27 Aug 2023 13:31:53 +0300 Subject: net/mlx5: E-switch, register event handler before arming the event Currently, mlx5 is registering event handler for vport context change event some time after arming the event. this can lead to missing an event, which will result in wrong rules in the FDB. Hence, register the event handler before arming the event. This solution is valid since FW is sending vport context change event only on vports which SW armed, and SW arming the vport when enabling it, which is done after the FDB has been created. Fixes: 6933a9379559 ("net/mlx5: E-Switch, Use async events chain") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d4cde6555063..8d0b915a3121 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1038,11 +1038,8 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(err); } -static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) +static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) { - MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->nb); - if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler, ESW_FUNCTIONS_CHANGED); @@ -1050,13 +1047,11 @@ static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) } } -static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) +static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw) { if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); - mlx5_eq_notifier_unregister(esw->dev, &esw->nb); - flush_workqueue(esw->work_queue); } @@ -1483,6 +1478,9 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) mlx5_eswitch_update_num_of_vfs(esw, num_vfs); + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + if (esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); } else { @@ -1495,7 +1493,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; - mlx5_eswitch_event_handlers_register(esw); + mlx5_eswitch_event_handler_register(esw); esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", @@ -1622,7 +1620,8 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) */ mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY); - mlx5_eswitch_event_handlers_unregister(esw); + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + mlx5_eswitch_event_handler_unregister(esw); esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", -- cgit v1.2.3 From 7a3ce8074878a68a75ceacec93d9ae05906eec86 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 9 Aug 2023 11:10:57 +0200 Subject: net/mlx5: Bridge, fix peer entry ageing in LAG mode With current implementation in single FDB LAG mode all packets are processed by eswitch 0 rules. As such, 'peer' FDB entries receive the packets for rules of other eswitches and are responsible for updating the main entry by sending SWITCHDEV_FDB_ADD_TO_BRIDGE notification from their background update wq task. However, this introduces a race condition when non-zero eswitch instance decides to delete a FDB entry, sends SWITCHDEV_FDB_DEL_TO_BRIDGE notification, but another eswitch's update task refreshes the same entry concurrently while its async delete work is still pending on the workque. In such case another SWITCHDEV_FDB_ADD_TO_BRIDGE event may be generated and entry will remain stuck in FDB marked as 'offloaded' since no more SWITCHDEV_FDB_DEL_TO_BRIDGE notifications are sent for deleting the peer entries. Fix the issue by synchronously marking deleted entries with MLX5_ESW_BRIDGE_FLAG_DELETED flag and skipping them in background update job. Signed-off-by: Vlad Buslov Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/rep/bridge.c | 11 ++++++++++ .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 25 +++++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/esw/bridge.h | 3 +++ .../ethernet/mellanox/mlx5/core/esw/bridge_priv.h | 1 + 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 0fef853eab62..5d128c5b4529 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -467,6 +467,17 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, /* only handle the event on peers */ if (mlx5_esw_bridge_is_local(dev, rep, esw)) break; + + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + /* Mark for deletion to prevent the update wq task from + * spuriously refreshing the entry which would mark it again as + * offloaded in SW bridge. After this fallthrough to regular + * async delete code. + */ + mlx5_esw_bridge_fdb_mark_deleted(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); fallthrough; case SWITCHDEV_FDB_ADD_TO_DEVICE: case SWITCHDEV_FDB_DEL_TO_DEVICE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index e36294b7ade2..1b9bc32efd6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1748,6 +1748,28 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 entry->lastuse = jiffies; } +void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge *bridge; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return; + + entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); + if (!entry) { + esw_debug(br_offloads->esw->dev, + "FDB mark deleted entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + fdb_info->addr, fdb_info->vid, vport_num); + return; + } + + entry->flags |= MLX5_ESW_BRIDGE_FLAG_DELETED; +} + void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info) @@ -1810,7 +1832,8 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) unsigned long lastuse = (unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter); - if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER) + if (entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | + MLX5_ESW_BRIDGE_FLAG_DELETED)) continue; if (time_after(lastuse, entry->lastuse)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index c2c7c70d99eb..d6f539161993 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -62,6 +62,9 @@ int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_nu void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h index 4911cc32161b..7c251af566c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -133,6 +133,7 @@ struct mlx5_esw_bridge_mdb_key { enum { MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1), + MLX5_ESW_BRIDGE_FLAG_DELETED = BIT(2), }; enum { -- cgit v1.2.3 From 92fd39634541eb0a11bf1bafbc8ba92d6ddb8dba Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Wed, 6 Sep 2023 21:48:30 +0300 Subject: net/mlx5: Handle fw tracer change ownership event based on MTRC Currently, whenever fw issues a change ownership event, the PF that owns the fw tracer drops its ownership directly and the other PFs try to pick up the ownership via what MTRC register suggests. In some cases, driver releases the ownership of the tracer and reacquires it later on. Whenever the driver releases ownership of the tracer, fw issues a change ownership event. This event can be delayed and come after driver has reacquired ownership of the tracer. Thus the late event will trigger the tracer owner PF to release the ownership again and lead to a scenario where no PF is owning the tracer. To prevent the scenario described above, when handling a change ownership event, do not drop ownership of the tracer directly, instead read the fw MTRC register to retrieve the up-to-date owner of the tracer and set it accordingly in driver level. Fixes: f53aaa31cce7 ("net/mlx5: FW tracer, implement tracer logic") Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 7c0f2adbea00..ad789349c06e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -848,7 +848,7 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); if (tracer->owner) { - tracer->owner = false; + mlx5_fw_tracer_ownership_acquire(tracer); return; } -- cgit v1.2.3 From be43b7489a3c4702799e50179da69c3df7d6899b Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 2 Oct 2023 14:05:29 +0300 Subject: net/mlx5e: RX, Fix page_pool allocation failure recovery for striding rq When a page allocation fails during refill in mlx5e_post_rx_mpwqes, the page will be released again on the next refill call. This triggers the page_pool negative page fragment count warning below: [ 2436.447717] WARNING: CPU: 1 PID: 2419 at include/net/page_pool/helpers.h:130 mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] ... [ 2436.447895] RIP: 0010:mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.447991] Call Trace: [ 2436.447975] mlx5e_post_rx_mpwqes+0x1d5/0xcf0 [mlx5_core] [ 2436.447994] [ 2436.447996] ? __warn+0x7d/0x120 [ 2436.448009] ? mlx5e_handle_rx_cqe_mpwrq+0x109/0x1d0 [mlx5_core] [ 2436.448002] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.448044] ? mlx5e_poll_rx_cq+0x87/0x6e0 [mlx5_core] [ 2436.448061] ? report_bug+0x155/0x180 [ 2436.448065] ? handle_bug+0x36/0x70 [ 2436.448067] ? exc_invalid_op+0x13/0x60 [ 2436.448070] ? asm_exc_invalid_op+0x16/0x20 [ 2436.448079] mlx5e_napi_poll+0x122/0x6b0 [mlx5_core] [ 2436.448077] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.448113] ? generic_exec_single+0x35/0x100 [ 2436.448117] __napi_poll+0x25/0x1a0 [ 2436.448120] net_rx_action+0x28a/0x300 [ 2436.448122] __do_softirq+0xcd/0x279 [ 2436.448126] irq_exit_rcu+0x6a/0x90 [ 2436.448128] sysvec_apic_timer_interrupt+0x6e/0x90 [ 2436.448130] This patch fixes the striding rq case by setting the skip flag on all the wqe pages that were expected to have new pages allocated. Fixes: 4c2a13236807 ("net/mlx5e: RX, Defer page release in striding rq for better recycling") Tested-by: Chris Mason Reported-by: Chris Mason Closes: https://lore.kernel.org/netdev/117FF31A-7BE0-4050-B2BB-E41F224FF72F@meta.com Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3fd11b0761e0..7988b3a9598c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -816,6 +816,8 @@ err_unmap: mlx5e_page_release_fragmented(rq, frag_page); } + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + err: rq->stats->buff_alloc_err++; -- cgit v1.2.3 From ef9369e9c30846f5e052a11ccc70e1f6b8dc557a Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 29 Sep 2023 17:31:49 +0300 Subject: net/mlx5e: RX, Fix page_pool allocation failure recovery for legacy rq When a page allocation fails during refill in mlx5e_refill_rx_wqes, the page will be released again on the next refill call. This triggers the page_pool negative page fragment count warning below: [ 338.326070] WARNING: CPU: 4 PID: 0 at include/net/page_pool/helpers.h:130 mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] ... [ 338.328993] RIP: 0010:mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329094] Call Trace: [ 338.329097] [ 338.329100] ? __warn+0x7d/0x120 [ 338.329105] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329173] ? report_bug+0x155/0x180 [ 338.329179] ? handle_bug+0x3c/0x60 [ 338.329183] ? exc_invalid_op+0x13/0x60 [ 338.329187] ? asm_exc_invalid_op+0x16/0x20 [ 338.329192] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329259] mlx5e_post_rx_wqes+0x210/0x5a0 [mlx5_core] [ 338.329327] ? mlx5e_poll_rx_cq+0x88/0x6f0 [mlx5_core] [ 338.329394] mlx5e_napi_poll+0x127/0x6b0 [mlx5_core] [ 338.329461] __napi_poll+0x25/0x1a0 [ 338.329465] net_rx_action+0x28a/0x300 [ 338.329468] __do_softirq+0xcd/0x279 [ 338.329473] irq_exit_rcu+0x6a/0x90 [ 338.329477] common_interrupt+0x82/0xa0 [ 338.329482] This patch fixes the legacy rq case by releasing all allocated fragments and then setting the skip flag on all released fragments. It is important to note that the number of released fragments will be higher than the number of allocated fragments when an allocation error occurs. Fixes: 3f93f82988bc ("net/mlx5e: RX, Defer page release in legacy rq for better recycling") Tested-by: Chris Mason Reported-by: Chris Mason Closes: https://lore.kernel.org/netdev/117FF31A-7BE0-4050-B2BB-E41F224FF72F@meta.com Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 33 ++++++++++++++++++------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7988b3a9598c..8d9743a5e42c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -457,26 +457,41 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) { int remaining = wqe_bulk; - int i = 0; + int total_alloc = 0; + int refill_alloc; + int refill; /* The WQE bulk is split into smaller bulks that are sized * according to the page pool cache refill size to avoid overflowing * the page pool cache due to too many page releases at once. */ do { - int refill = min_t(u16, rq->wqe.info.refill_unit, remaining); - int alloc_count; + refill = min_t(u16, rq->wqe.info.refill_unit, remaining); - mlx5e_free_rx_wqes(rq, ix + i, refill); - alloc_count = mlx5e_alloc_rx_wqes(rq, ix + i, refill); - i += alloc_count; - if (unlikely(alloc_count != refill)) - break; + mlx5e_free_rx_wqes(rq, ix + total_alloc, refill); + refill_alloc = mlx5e_alloc_rx_wqes(rq, ix + total_alloc, refill); + if (unlikely(refill_alloc != refill)) + goto err_free; + total_alloc += refill_alloc; remaining -= refill; } while (remaining); - return i; + return total_alloc; + +err_free: + mlx5e_free_rx_wqes(rq, ix, total_alloc + refill_alloc); + + for (int i = 0; i < total_alloc + refill; i++) { + int j = mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + + frag = get_frag(rq, j); + for (int k = 0; k < rq->wqe.info.num_frags; k++, frag++) + frag->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } + + return 0; } static void -- cgit v1.2.3 From aaab619ccd07a32e5b29aa7e59b20de1dcc7a29e Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 25 Sep 2023 17:50:18 +0300 Subject: net/mlx5e: XDP, Fix XDP_REDIRECT mpwqe page fragment leaks on shutdown When mlx5e_xdp_xmit is called without the XDP_XMIT_FLUSH set it is possible that it leaves a mpwqe session open. That is ok during runtime: the session will be closed on the next call to mlx5e_xdp_xmit. But having a mpwqe session still open at XDP sq close time is problematic: the pc counter is not updated before flushing the contents of the xdpi_fifo. This results in leaking page fragments. The fix is to always close the mpwqe session at the end of mlx5e_xdp_xmit, regardless of the XDP_XMIT_FLUSH flag being set or not. Fixes: 5e0d2eef771e ("net/mlx5e: XDP, Support Enhanced Multi-Packet TX WQE") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 12f56d0db0af..8bed17d8fe56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -874,11 +874,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, } out: - if (flags & XDP_XMIT_FLUSH) { - if (sq->mpwqe.wqe) - mlx5e_xdp_mpwqe_complete(sq); + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + if (flags & XDP_XMIT_FLUSH) mlx5e_xmit_xdp_doorbell(sq); - } return nxmit; } -- cgit v1.2.3 From c51c673462a266fb813cf189f8190798a12d3124 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Tue, 12 Sep 2023 10:06:24 +0300 Subject: net/mlx5e: Take RTNL lock before triggering netdev notifiers Hold RTNL lock when calling xdp_set_features() with a registered netdev, as the call triggers the netdev notifiers. This could happen when switching from nic profile to uplink representor for example. Similar logic which fixed a similar scenario was previously introduced in the following commit: commit 72cc65497065 net/mlx5e: Take RTNL lock when needed before calling xdp_set_features(). This fixes the following assertion and warning call trace: RTNL: assertion failed at net/core/dev.c (1961) WARNING: CPU: 13 PID: 2529 at net/core/dev.c:1961 call_netdevice_notifiers_info+0x7c/0x80 Modules linked in: rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_core zram zsmalloc fuse CPU: 13 PID: 2529 Comm: devlink Not tainted 6.5.0_for_upstream_min_debug_2023_09_07_20_04 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:call_netdevice_notifiers_info+0x7c/0x80 Code: 8f ff 80 3d 77 0d 16 01 00 75 c5 ba a9 07 00 00 48 c7 c6 c4 bb 0d 82 48 c7 c7 18 c8 06 82 c6 05 5b 0d 16 01 01 e8 44 f6 8c ff <0f> 0b eb a2 0f 1f 44 00 00 55 48 89 e5 41 54 48 83 e4 f0 48 83 ec RSP: 0018:ffff88819930f7f0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffffff8309f740 RCX: 0000000000000027 RDX: ffff88885fb5b5c8 RSI: 0000000000000001 RDI: ffff88885fb5b5c0 RBP: 0000000000000028 R08: ffff88887ffabaa8 R09: 0000000000000003 R10: ffff88887fecbac0 R11: ffff88887ff7bac0 R12: ffff88819930f810 R13: ffff88810b7fea40 R14: ffff8881154e8fd8 R15: ffff888107e881a0 FS: 00007f3ad248f800(0000) GS:ffff88885fb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000563b85f164e0 CR3: 0000000113b5c006 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __warn+0x79/0x120 ? call_netdevice_notifiers_info+0x7c/0x80 ? report_bug+0x17c/0x190 ? handle_bug+0x3c/0x60 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? call_netdevice_notifiers_info+0x7c/0x80 call_netdevice_notifiers+0x2e/0x50 mlx5e_set_xdp_feature+0x21/0x50 [mlx5_core] mlx5e_build_rep_params+0x97/0x130 [mlx5_core] mlx5e_init_ul_rep+0x9f/0x100 [mlx5_core] mlx5e_netdev_init_profile+0x76/0x110 [mlx5_core] mlx5e_netdev_attach_profile+0x1f/0x90 [mlx5_core] mlx5e_netdev_change_profile+0x92/0x160 [mlx5_core] mlx5e_vport_rep_load+0x329/0x4a0 [mlx5_core] mlx5_esw_offloads_rep_load+0x9e/0xf0 [mlx5_core] esw_offloads_enable+0x4bc/0xe90 [mlx5_core] mlx5_eswitch_enable_locked+0x3c8/0x570 [mlx5_core] ? kmalloc_trace+0x25/0x80 mlx5_devlink_eswitch_mode_set+0x224/0x680 [mlx5_core] ? devlink_get_from_attrs_lock+0x9e/0x110 devlink_nl_cmd_eswitch_set_doit+0x60/0xe0 genl_family_rcv_msg_doit+0xd0/0x120 genl_rcv_msg+0x180/0x2b0 ? devlink_get_from_attrs_lock+0x110/0x110 ? devlink_nl_cmd_eswitch_get_doit+0x290/0x290 ? devlink_pernet_pre_exit+0xf0/0xf0 ? genl_family_rcv_msg_dumpit+0xf0/0xf0 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1fc/0x2c0 netlink_sendmsg+0x232/0x4a0 sock_sendmsg+0x38/0x60 ? _copy_from_user+0x2a/0x60 __sys_sendto+0x110/0x160 ? handle_mm_fault+0x161/0x260 ? do_user_addr_fault+0x276/0x620 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f3ad231340a Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 RSP: 002b:00007ffd70aad4b8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000c36b00 RCX:00007f3ad231340a RDX: 0000000000000038 RSI: 0000000000c36b00 RDI: 0000000000000003 RBP: 0000000000c36910 R08: 00007f3ad2625200 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 ---[ end trace 0000000000000000 ]--- ------------[ cut here ]------------ Fixes: 4d5ab0ad964d ("net/mlx5e: take into account device reconfiguration for xdp_features flag") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2fdb8895aecd..5ca9bc337dc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -769,6 +769,7 @@ static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev) static void mlx5e_build_rep_params(struct net_device *netdev) { + const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; @@ -794,8 +795,15 @@ static void mlx5e_build_rep_params(struct net_device *netdev) /* RQ */ mlx5e_build_rq_params(mdev, params); + /* If netdev is already registered (e.g. move from nic profile to uplink, + * RTNL lock must be held before triggering netdev notifiers. + */ + if (take_rtnl) + rtnl_lock(); /* update XDP supported features */ mlx5e_set_xdp_feature(netdev); + if (take_rtnl) + rtnl_unlock(); /* CQ moderation params */ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); -- cgit v1.2.3 From 06b4eac9c4beda520b8a4dbbb8e33dba9d1c8fba Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 12 Sep 2023 02:28:47 +0000 Subject: net/mlx5e: Don't offload internal port if filter device is out device In the cited commit, if the routing device is ovs internal port, the out device is set to uplink, and packets go out after encapsulation. If filter device is uplink, it can trigger the following syndrome: mlx5_core 0000:08:00.0: mlx5_cmd_out_err:803:(pid 3966): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0xcdb051), err(-22) Fix this issue by not offloading internal port if filter device is out device. In this case, packets are not forwarded to the root table to be processed, the termination table is used instead to forward them from uplink to uplink. Fixes: 100ad4e2d758 ("net/mlx5e: Offload internal port as encap route device") Signed-off-by: Jianbo Liu Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 1730f6a716ee..b10e40e1a9c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -24,7 +24,8 @@ static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); - if (!route_dev || !netif_is_ovs_master(route_dev)) + if (!route_dev || !netif_is_ovs_master(route_dev) || + attr->parse_attr->filter_dev == e->out_dev) goto out; err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, -- cgit v1.2.3 From 80f1241484dd1b1d4eab1a0211d52ec2bd83e2f1 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Mon, 4 Sep 2023 18:26:47 +0300 Subject: net/mlx5e: Fix VF representors reporting zero counters to "ip -s" command Although vf_vport entry of struct mlx5e_stats is never updated, its values are mistakenly copied to the caller structure in the VF representor .ndo_get_stat_64 callback mlx5e_rep_get_stats(). Remove redundant entry and use the updated one, rep_stats, instead. Fixes: 64b68e369649 ("net/mlx5: Refactor and expand rep vport stat group") Reviewed-by: Patrisious Haddad Signed-off-by: Amir Tzin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 11 ++++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 5ca9bc337dc6..fd1cce542b68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -701,7 +701,7 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) /* update HW stats in background for next time */ mlx5e_queue_update_stats(priv); - memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); + mlx5e_stats_copy_rep_stats(stats, &priv->stats.rep_stats); } static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 176fa5976259..477c547dcc04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -484,11 +484,20 @@ struct mlx5e_stats { struct mlx5e_vnic_env_stats vnic; struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; - struct rtnl_link_stats64 vf_vport; struct mlx5e_pcie_stats pcie; struct mlx5e_rep_stats rep_stats; }; +static inline void mlx5e_stats_copy_rep_stats(struct rtnl_link_stats64 *vf_vport, + struct mlx5e_rep_stats *rep_stats) +{ + memset(vf_vport, 0, sizeof(*vf_vport)); + vf_vport->rx_packets = rep_stats->vport_rx_packets; + vf_vport->tx_packets = rep_stats->vport_tx_packets; + vf_vport->rx_bytes = rep_stats->vport_rx_bytes; + vf_vport->tx_bytes = rep_stats->vport_tx_bytes; +} + extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[]; unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index c24828b688ac..c8590483ddc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4972,7 +4972,8 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, if (err) return err; - rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + mlx5e_stats_copy_rep_stats(&rpriv->prev_vf_vport_stats, + &priv->stats.rep_stats); break; default: NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); @@ -5012,7 +5013,7 @@ void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, u64 dbytes; u64 dpkts; - cur_stats = priv->stats.vf_vport; + mlx5e_stats_copy_rep_stats(&cur_stats, &priv->stats.rep_stats); dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; rpriv->prev_vf_vport_stats = cur_stats; -- cgit v1.2.3 From d35652a5fc9944784f6f50a5c979518ff8dacf61 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 12 Oct 2023 13:04:24 +0300 Subject: x86/alternatives: Disable KASAN in apply_alternatives() Fei has reported that KASAN triggers during apply_alternatives() on a 5-level paging machine: BUG: KASAN: out-of-bounds in rcu_is_watching() Read of size 4 at addr ff110003ee6419a0 by task swapper/0/0 ... __asan_load4() rcu_is_watching() trace_hardirqs_on() text_poke_early() apply_alternatives() ... On machines with 5-level paging, cpu_feature_enabled(X86_FEATURE_LA57) gets patched. It includes KASAN code, where KASAN_SHADOW_START depends on __VIRTUAL_MASK_SHIFT, which is defined with cpu_feature_enabled(). KASAN gets confused when apply_alternatives() patches the KASAN_SHADOW_START users. A test patch that makes KASAN_SHADOW_START static, by replacing __VIRTUAL_MASK_SHIFT with 56, works around the issue. Fix it for real by disabling KASAN while the kernel is patching alternatives. [ mingo: updated the changelog ] Fixes: 6657fca06e3f ("x86/mm: Allow to boot without LA57 if CONFIG_X86_5LEVEL=y") Reported-by: Fei Yang Signed-off-by: Kirill A. Shutemov Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231012100424.1456-1-kirill.shutemov@linux.intel.com --- arch/x86/kernel/alternative.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 517ee01503be..73be3931e4f0 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -403,6 +403,17 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, u8 insn_buff[MAX_PATCH_LEN]; DPRINTK(ALT, "alt table %px, -> %px", start, end); + + /* + * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using + * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here. + * During the process, KASAN becomes confused seeing partial LA57 + * conversion and triggers a false-positive out-of-bound report. + * + * Disable KASAN until the patching is complete. + */ + kasan_disable_current(); + /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. @@ -452,6 +463,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, text_poke_early(instr, insn_buff, insn_buff_sz); } + + kasan_enable_current(); } static inline bool is_jcc32(struct insn *insn) -- cgit v1.2.3 From 505b02957e74f0c5c4655647ccb04bdc945d18f6 Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Thu, 14 Sep 2023 17:13:34 +0800 Subject: riscv: Remove duplicate objcopy flag There are two duplicate `-O binary` flags when objcopying from vmlinux to Image/xipImage. RISC-V set `-O binary` flag in both OBJCOPYFLAGS in the top-level riscv Makefile and OBJCOPYFLAGS_* in the boot/Makefile, and the objcopy cmd in Kbuild would join them together. The `-O binary` flag is only needed for objcopying Image, so remove the OBJCOPYFLAGS in the top-level riscv Makefile. Fixes: c0fbcd991860 ("RISC-V: Build flat and compressed kernel images") Signed-off-by: Song Shuai Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230914091334.1458542-1-songshuaishuai@tinylab.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 1329e060c548..b43a6bb7e4dc 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -6,7 +6,6 @@ # for more details. # -OBJCOPYFLAGS := -O binary LDFLAGS_vmlinux := -z norelro ifeq ($(CONFIG_RELOCATABLE),y) LDFLAGS_vmlinux += -shared -Bsymbolic -z notext --emit-relocs -- cgit v1.2.3 From 1d6cd2146c2b58bc91266db1d5d6a5f9632e14c0 Mon Sep 17 00:00:00 2001 From: Chen Jiahao Date: Mon, 25 Sep 2023 10:43:33 +0800 Subject: riscv: kdump: fix crashkernel reserving problem on RISC-V When testing on risc-v QEMU environment with "crashkernel=" parameter enabled, a problem occurred with the following message: [ 0.000000] crashkernel low memory reserved: 0xf8000000 - 0x100000000 (128 MB) [ 0.000000] crashkernel reserved: 0x0000000177e00000 - 0x0000000277e00000 (4096 MB) [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:779 __insert_resource+0x8e/0xd0 [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.6.0-rc2-next-20230920 #1 [ 0.000000] Hardware name: riscv-virtio,qemu (DT) [ 0.000000] epc : __insert_resource+0x8e/0xd0 [ 0.000000] ra : insert_resource+0x28/0x4e [ 0.000000] epc : ffffffff80017344 ra : ffffffff8001742e sp : ffffffff81203db0 [ 0.000000] gp : ffffffff812ece98 tp : ffffffff8120dac0 t0 : ff600001f7ff2b00 [ 0.000000] t1 : 0000000000000000 t2 : 3428203030303030 s0 : ffffffff81203dc0 [ 0.000000] s1 : ffffffff81211e18 a0 : ffffffff81211e18 a1 : ffffffff81289380 [ 0.000000] a2 : 0000000277dfffff a3 : 0000000177e00000 a4 : 0000000177e00000 [ 0.000000] a5 : ffffffff81289380 a6 : 0000000277dfffff a7 : 0000000000000078 [ 0.000000] s2 : ffffffff81289380 s3 : ffffffff80a0bac8 s4 : ff600001f7ff2880 [ 0.000000] s5 : 0000000000000280 s6 : 8000000a00006800 s7 : 000000000000007f [ 0.000000] s8 : 0000000080017038 s9 : 0000000080038ea0 s10: 0000000000000000 [ 0.000000] s11: 0000000000000000 t3 : ffffffff80a0bc00 t4 : ffffffff80a0bc00 [ 0.000000] t5 : ffffffff80a0bbd0 t6 : ffffffff80a0bc00 [ 0.000000] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [ 0.000000] [] __insert_resource+0x8e/0xd0 [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] Failed to add a Crash kernel resource at 177e00000 The crashkernel memory has been allocated successfully, whereas it failed to insert into iomem_resource. This is due to the unique reserving logic in risc-v arch specific code, i.e. crashk_res/crashk_low_res will be added into iomem_resource later in init_resources(), which is not aligned with current unified reserving logic in reserve_crashkernel_{generic,low}() and therefore leads to the failure of crashkernel reservation. Removing the arch specific code within #ifdef CONFIG_KEXEC_CORE in init_resources() to fix above problem. Fixes: 31549153088e ("riscv: kdump: use generic interface to simplify crashkernel reservation") Signed-off-by: Chen Jiahao Link: https://lore.kernel.org/r/20230925024333.730964-1-chenjiahao16@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index e600aab116a4..aac853ae4eb7 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -173,19 +173,6 @@ static void __init init_resources(void) if (ret < 0) goto error; -#ifdef CONFIG_KEXEC_CORE - if (crashk_res.start != crashk_res.end) { - ret = add_resource(&iomem_resource, &crashk_res); - if (ret < 0) - goto error; - } - if (crashk_low_res.start != crashk_low_res.end) { - ret = add_resource(&iomem_resource, &crashk_low_res); - if (ret < 0) - goto error; - } -#endif - #ifdef CONFIG_CRASH_DUMP if (elfcorehdr_size > 0) { elfcorehdr_res.start = elfcorehdr_addr; -- cgit v1.2.3 From 07a27665754bf649b5de8e55c655e4d6837406be Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Wed, 13 Sep 2023 13:29:40 +0800 Subject: RISC-V: Fix wrong use of CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK If configuration options SOFTIRQ_ON_OWN_STACK and PREEMPT_RT are enabled simultaneously under RISC-V architecture, it will result in a compilation failure: arch/riscv/kernel/irq.c:64:6: error: redefinition of 'do_softirq_own_stack' 64 | void do_softirq_own_stack(void) | ^~~~~~~~~~~~~~~~~~~~ In file included from ./arch/riscv/include/generated/asm/softirq_stack.h:1, from arch/riscv/kernel/irq.c:15: ./include/asm-generic/softirq_stack.h:8:20: note: previous definition of 'do_softirq_own_stack' was here 8 | static inline void do_softirq_own_stack(void) | ^~~~~~~~~~~~~~~~~~~~ After changing CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK to CONFIG_SOFTIRQ_ON_OWN_STACK, compilation can be successful. Fixes: dd69d07a5a6c ("riscv: stack: Support HAVE_SOFTIRQ_ON_OWN_STACK") Reviewed-by: Guo Ren Signed-off-by: Jiexun Wang Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20230913052940.374686-1-wangjiexun@tinylab.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index a8efa053c4a5..9cc0a7669271 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -60,7 +60,7 @@ static void init_irq_stacks(void) } #endif /* CONFIG_VMAP_STACK */ -#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void) { #ifdef CONFIG_IRQ_STACKS @@ -92,7 +92,7 @@ void do_softirq_own_stack(void) #endif __do_softirq(); } -#endif /* CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK */ +#endif /* CONFIG_SOFTIRQ_ON_OWN_STACK */ #else static void init_irq_stacks(void) {} -- cgit v1.2.3 From a87e7d3e8832271ecb7d5eaaabc5b49fe25a469b Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 3 Oct 2023 20:24:07 +0200 Subject: riscv: Fix ftrace syscall handling which are now prefixed with __riscv_ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ftrace creates entries for each syscall in the tracefs but has failed since commit 08d0ce30e0e4 ("riscv: Implement syscall wrappers") which prefixes all riscv syscalls with __riscv_. So fix this by implementing arch_syscall_match_sym_name() which allows us to ignore this prefix. And also ignore compat syscalls like x86/arm64 by implementing arch_trace_is_compat_syscall(). Fixes: 08d0ce30e0e4 ("riscv: Implement syscall wrappers") Signed-off-by: Alexandre Ghiti Reviewed-by: Sami Tolvanen Acked-by: Masami Hiramatsu (Google) Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20231003182407.32198-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 740a979171e5..2b2f5df7ef2c 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -31,6 +31,27 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +/* + * Let's do like x86/arm64 and ignore the compat syscalls. + */ +#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS +static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) +{ + return is_compat_task(); +} + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + /* + * Since all syscall functions have __riscv_ prefix, we must skip it. + * However, as we described above, we decided to ignore compat + * syscalls, so we don't care about __riscv_compat_ prefix here. + */ + return !strcmp(sym + 8, name); +} + struct dyn_arch_ftrace { }; #endif -- cgit v1.2.3 From 3fec323339a4a9801a54e8b282eb571965b67b23 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 6 Oct 2023 10:20:10 +0200 Subject: drivers: perf: Fix panic in riscv SBI mmap support The following panic can happen when mmap is called before the pmu add callback which sets the hardware counter index: this happens for example with the following command `perf record --no-bpf-event -n kill`. [ 99.461486] CPU: 1 PID: 1259 Comm: perf Tainted: G E 6.6.0-rc4ubuntu-defconfig #2 [ 99.461669] Hardware name: riscv-virtio,qemu (DT) [ 99.461748] epc : pmu_sbi_set_scounteren+0x42/0x44 [ 99.462337] ra : smp_call_function_many_cond+0x126/0x5b0 [ 99.462369] epc : ffffffff809f9d24 ra : ffffffff800f93e0 sp : ff60000082153aa0 [ 99.462407] gp : ffffffff82395c98 tp : ff6000009a218040 t0 : ff6000009ab3a4f0 [ 99.462425] t1 : 0000000000000004 t2 : 0000000000000100 s0 : ff60000082153ab0 [ 99.462459] s1 : 0000000000000000 a0 : ff60000098869528 a1 : 0000000000000000 [ 99.462473] a2 : 000000000000001f a3 : 0000000000f00000 a4 : fffffffffffffff8 [ 99.462488] a5 : 00000000000000cc a6 : 0000000000000000 a7 : 0000000000735049 [ 99.462502] s2 : 0000000000000001 s3 : ffffffff809f9ce2 s4 : ff60000098869528 [ 99.462516] s5 : 0000000000000002 s6 : 0000000000000004 s7 : 0000000000000001 [ 99.462530] s8 : ff600003fec98bc0 s9 : ffffffff826c5890 s10: ff600003fecfcde0 [ 99.462544] s11: ff600003fec98bc0 t3 : ffffffff819e2558 t4 : ff1c000004623840 [ 99.462557] t5 : 0000000000000901 t6 : ff6000008feeb890 [ 99.462570] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [ 99.462658] [] pmu_sbi_set_scounteren+0x42/0x44 [ 99.462979] Code: 1060 4785 97bb 00d7 8fd9 9073 1067 6422 0141 8082 (9002) 0013 [ 99.463335] Kernel BUG [#2] To circumvent this, try to enable userspace access to the hardware counter when it is selected in addition to when the event is mapped. And vice-versa when the event is stopped/unmapped. Fixes: cc4c07c89aad ("drivers: perf: Implement perf event mmap support in the SBI backend") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231006082010.11963-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu.c | 3 ++- drivers/perf/riscv_pmu_sbi.c | 16 ++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 1f9a35f724f5..0dda70e1ef90 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -23,7 +23,8 @@ static bool riscv_perf_user_access(struct perf_event *event) return ((event->attr.type == PERF_TYPE_HARDWARE) || (event->attr.type == PERF_TYPE_HW_CACHE) || (event->attr.type == PERF_TYPE_RAW)) && - !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT); + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) && + (event->hw.idx != -1); } void arch_perf_update_userpage(struct perf_event *event, diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 9a51053b1f99..96c7f670c8f0 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -510,16 +510,18 @@ static void pmu_sbi_set_scounteren(void *arg) { struct perf_event *event = (struct perf_event *)arg; - csr_write(CSR_SCOUNTEREN, - csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event))); + if (event->hw.idx != -1) + csr_write(CSR_SCOUNTEREN, + csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event))); } static void pmu_sbi_reset_scounteren(void *arg) { struct perf_event *event = (struct perf_event *)arg; - csr_write(CSR_SCOUNTEREN, - csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event))); + if (event->hw.idx != -1) + csr_write(CSR_SCOUNTEREN, + csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event))); } static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) @@ -541,7 +543,8 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) - pmu_sbi_set_scounteren((void *)event); + on_each_cpu_mask(mm_cpumask(event->owner->mm), + pmu_sbi_set_scounteren, (void *)event, 1); } static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) @@ -551,7 +554,8 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) - pmu_sbi_reset_scounteren((void *)event); + on_each_cpu_mask(mm_cpumask(event->owner->mm), + pmu_sbi_reset_scounteren, (void *)event, 1); ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0); if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && -- cgit v1.2.3 From ca10d851b9ad0338c19e8e3089e24d565ebfffd7 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 10 Oct 2023 22:48:42 -0400 Subject: workqueue: Override implicit ordered attribute in workqueue_apply_unbound_cpumask() Commit 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") enabled implicit ordered attribute to be added to WQ_UNBOUND workqueues with max_active of 1. This prevented the changing of attributes to these workqueues leading to fix commit 0a94efb5acbb ("workqueue: implicit ordered attribute should be overridable"). However, workqueue_apply_unbound_cpumask() was not updated at that time. So sysfs changes to wq_unbound_cpumask has no effect on WQ_UNBOUND workqueues with implicit ordered attribute. Since not all WQ_UNBOUND workqueues are visible on sysfs, we are not able to make all the necessary cpumask changes even if we iterates all the workqueue cpumasks in sysfs and changing them one by one. Fix this problem by applying the corresponding change made to apply_workqueue_attrs_locked() in the fix commit to workqueue_apply_unbound_cpumask(). Fixes: 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/workqueue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6f74cab2bd5a..51177ffe16f1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5792,9 +5792,13 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask) list_for_each_entry(wq, &workqueues, list) { if (!(wq->flags & WQ_UNBOUND)) continue; + /* creating multiple pwqs breaks ordering guarantee */ - if (wq->flags & __WQ_ORDERED) - continue; + if (!list_empty(&wq->pwqs)) { + if (wq->flags & __WQ_ORDERED_EXPLICIT) + continue; + wq->flags &= ~__WQ_ORDERED; + } ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask); if (IS_ERR(ctx)) { -- cgit v1.2.3 From 5d9c7a1e3e8e18db8e10c546de648cda2a57be52 Mon Sep 17 00:00:00 2001 From: Lucy Mielke Date: Mon, 9 Oct 2023 19:09:46 +0200 Subject: workqueue: fix -Wformat-truncation in create_worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling with W=1 emitted the following warning (Compiler: gcc (x86-64, ver. 13.2.1, .config: result of make allyesconfig, "Treat warnings as errors" turned off): kernel/workqueue.c:2188:54: warning: ‘%d’ directive output may be truncated writing between 1 and 10 bytes into a region of size between 5 and 14 [-Wformat-truncation=] kernel/workqueue.c:2188:50: note: directive argument in the range [0, 2147483647] kernel/workqueue.c:2188:17: note: ‘snprintf’ output between 4 and 23 bytes into a destination of size 16 setting "id_buf" to size 23 will silence the warning, since GCC determines snprintf's output to be max. 23 bytes in line 2188. Please let me know if there are any mistakes in my patch! Signed-off-by: Lucy Mielke Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 51177ffe16f1..a3522b70218d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2166,7 +2166,7 @@ static struct worker *create_worker(struct worker_pool *pool) { struct worker *worker; int id; - char id_buf[16]; + char id_buf[23]; /* ID is needed to determine kthread name */ id = ida_alloc(&pool->worker_ida, GFP_KERNEL); -- cgit v1.2.3 From b35726396390eb445668cecdbcd41d0285dbefdf Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 12 Oct 2023 15:54:21 -0700 Subject: Revert "Input: psmouse - add delay when deactivating for SMBus mode" This reverts commit 92e24e0e57f72e06c2df87116557331fd2d4dda2. While the patch itself is correct, it uncovered an issue with fallback to PS/2 mode, where we were leaving psmouse->fast_reconnect handler set to psmouse_smbus_reconnect(), which caused crashes. While discussing various approaches to fix the issue it was noted that this patch ass undesired delay in the "fast" resume path of PS/2 device, and it would be better to actually use "reset_delay" option defined in struct rmi_device_platform_data and have RMI code handle it for SMBus transport as well. So this patch is being reverted to deal with crashes and a better solution will be merged shortly. Reported-by: Thorsten Leemhuis Closes: https://lore.kernel.org/all/ca0109fa-c64b-43c1-a651-75b294d750a1@leemhuis.info/ Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/psmouse-smbus.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/input/mouse/psmouse-smbus.c b/drivers/input/mouse/psmouse-smbus.c index 7b13de979908..2a2459b1b4f2 100644 --- a/drivers/input/mouse/psmouse-smbus.c +++ b/drivers/input/mouse/psmouse-smbus.c @@ -5,7 +5,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include @@ -119,18 +118,13 @@ static psmouse_ret_t psmouse_smbus_process_byte(struct psmouse *psmouse) return PSMOUSE_FULL_PACKET; } -static void psmouse_activate_smbus_mode(struct psmouse_smbus_dev *smbdev) -{ - if (smbdev->need_deactivate) { - psmouse_deactivate(smbdev->psmouse); - /* Give the device time to switch into SMBus mode */ - msleep(30); - } -} - static int psmouse_smbus_reconnect(struct psmouse *psmouse) { - psmouse_activate_smbus_mode(psmouse->private); + struct psmouse_smbus_dev *smbdev = psmouse->private; + + if (smbdev->need_deactivate) + psmouse_deactivate(psmouse); + return 0; } @@ -263,7 +257,8 @@ int psmouse_smbus_init(struct psmouse *psmouse, } } - psmouse_activate_smbus_mode(smbdev); + if (need_deactivate) + psmouse_deactivate(psmouse); psmouse->private = smbdev; psmouse->protocol_handler = psmouse_smbus_process_byte; -- cgit v1.2.3 From f43328357defc0dc9d28dbd06dc3361fd2b22e28 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 6 Oct 2023 10:41:36 +0900 Subject: ksmbd: not allow to open file if delelete on close bit is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cthon test fail with the following error. check for proper open/unlink operation nfsjunk files before unlink: -rwxr-xr-x 1 root root 0 9월 25 11:03 ./nfs2y8Jm9 ./nfs2y8Jm9 open; unlink ret = 0 nfsjunk files after unlink: -rwxr-xr-x 1 root root 0 9월 25 11:03 ./nfs2y8Jm9 data compare ok nfsjunk files after close: ls: cannot access './nfs2y8Jm9': No such file or directory special tests failed Cthon expect to second unlink failure when file is already unlinked. ksmbd can not allow to open file if flags of ksmbd inode is set with S_DEL_ON_CLS flags. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index c4b80ab7df74..1c5c39733652 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -106,7 +106,7 @@ int ksmbd_query_inode_status(struct inode *inode) ci = __ksmbd_inode_lookup(inode); if (ci) { ret = KSMBD_INODE_STATUS_OK; - if (ci->m_flags & S_DEL_PENDING) + if (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS)) ret = KSMBD_INODE_STATUS_PENDING_DELETE; atomic_dec(&ci->m_count); } @@ -116,7 +116,7 @@ int ksmbd_query_inode_status(struct inode *inode) bool ksmbd_inode_pending_delete(struct ksmbd_file *fp) { - return (fp->f_ci->m_flags & S_DEL_PENDING); + return (fp->f_ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS)); } void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp) -- cgit v1.2.3 From be0f89d4419dc5413a1cf06db3671c9949be0d52 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 9 Oct 2023 23:58:15 +0900 Subject: ksmbd: fix wrong error response status by using set_smb2_rsp_status() set_smb2_rsp_status() after __process_request() sets the wrong error status. This patch resets all iov vectors and sets the error status on clean one. Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 898860adf929..87c6401a6007 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -231,11 +231,12 @@ void set_smb2_rsp_status(struct ksmbd_work *work, __le32 err) { struct smb2_hdr *rsp_hdr; - if (work->next_smb2_rcv_hdr_off) - rsp_hdr = ksmbd_resp_buf_next(work); - else - rsp_hdr = smb2_get_msg(work->response_buf); + rsp_hdr = smb2_get_msg(work->response_buf); rsp_hdr->Status = err; + + work->iov_idx = 0; + work->iov_cnt = 0; + work->next_smb2_rcv_hdr_off = 0; smb2_set_err_rsp(work); } -- cgit v1.2.3 From 414849040fcf11d45025b8ae26c9fd91da1465da Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 11 Oct 2023 23:29:49 +0900 Subject: ksmbd: fix Null pointer dereferences in ksmbd_update_fstate() Coverity Scan report the following one. This report is a false alarm. Because fp is never NULL when rc is zero. This patch add null check for fp in ksmbd_update_fstate to make alarm silence. *** CID 1568583: Null pointer dereferences (FORWARD_NULL) /fs/smb/server/smb2pdu.c: 3408 in smb2_open() 3402 path_put(&path); 3403 path_put(&parent_path); 3404 } 3405 ksmbd_revert_fsids(work); 3406 err_out1: 3407 if (!rc) { >>> CID 1568583: Null pointer dereferences (FORWARD_NULL) >>> Passing null pointer "fp" to "ksmbd_update_fstate", which dereferences it. 3408 ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED); 3409 rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len); 3410 } 3411 if (rc) { 3412 if (rc == -EINVAL) 3413 rsp->hdr.Status = STATUS_INVALID_PARAMETER; Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Reported-by: Coverity Scan Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 1c5c39733652..c91eac6514dd 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -603,6 +603,9 @@ err_out: void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, unsigned int state) { + if (!fp) + return; + write_lock(&ft->lock); fp->f_state = state; write_unlock(&ft->lock); -- cgit v1.2.3 From 1903e6d0578118e9aab1ee23f4a9de55737d1d05 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 11 Oct 2023 23:30:26 +0900 Subject: ksmbd: fix potential double free on smb2_read_pipe() error path Fix new smatch warnings: fs/smb/server/smb2pdu.c:6131 smb2_read_pipe() error: double free of 'rpc_resp' Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 87c6401a6007..93262ca3f58a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6152,12 +6152,12 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work) memcpy(aux_payload_buf, rpc_resp->payload, rpc_resp->payload_sz); nbytes = rpc_resp->payload_sz; - kvfree(rpc_resp); err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); if (err) goto out; + kvfree(rpc_resp); } else { err = ksmbd_iov_pin_rsp(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer)); -- cgit v1.2.3 From de5724ca38fd5e442bae9c1fab31942b6544012d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Oct 2023 10:24:29 +0000 Subject: xfrm: fix a data-race in xfrm_lookup_with_ifid() syzbot complains about a race in xfrm_lookup_with_ifid() [1] When preparing commit 0a9e5794b21e ("xfrm: annotate data-race around use_time") I thought xfrm_lookup_with_ifid() was modifying a still private structure. [1] BUG: KCSAN: data-race in xfrm_lookup_with_ifid / xfrm_lookup_with_ifid write to 0xffff88813ea41108 of 8 bytes by task 8150 on cpu 1: xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218 xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline] xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281 ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246 send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139 wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178 wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200 wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline] wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 worker_thread+0x525/0x730 kernel/workqueue.c:2784 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 write to 0xffff88813ea41108 of 8 bytes by task 15867 on cpu 0: xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218 xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline] xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281 ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246 send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139 wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178 wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200 wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline] wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 worker_thread+0x525/0x730 kernel/workqueue.c:2784 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 value changed: 0x00000000651cd9d1 -> 0x00000000651cd9d2 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 15867 Comm: kworker/u4:58 Not tainted 6.6.0-rc4-syzkaller-00016-g5e62ed3b1c8a #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023 Workqueue: wg-kex-wg2 wg_packet_handshake_send_worker Fixes: 0a9e5794b21e ("xfrm: annotate data-race around use_time") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Steffen Klassert Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 99df2862bdc9..d24b4d4f620e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3220,7 +3220,7 @@ no_transform: } for (i = 0; i < num_pols; i++) - pols[i]->curlft.use_time = ktime_get_real_seconds(); + WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds()); if (num_xfrms < 0) { /* Prohibit the flow */ -- cgit v1.2.3 From d93f3f992780af4a21e6c1ab86946b7c5602f1b9 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Tue, 10 Oct 2023 18:39:33 +0200 Subject: bonding: Return pointer to data after pull on skb Since 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers"), header offsets used to compute a hash in bond_xmit_hash() are relative to skb->data and not skb->head. If the tail of the header buffer of an skb really needs to be advanced and the operation is successful, the pointer to the data must be returned (and not a pointer to the head of the buffer). Fixes: 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers") Signed-off-by: Jiri Wiesner Acked-by: Jay Vosburgh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ed7212e61c54..51d47eda1c87 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4023,7 +4023,7 @@ static inline const void *bond_pull_data(struct sk_buff *skb, if (likely(n <= hlen)) return data; else if (skb && likely(pskb_may_pull(skb, n))) - return skb->head; + return skb->data; return NULL; } -- cgit v1.2.3 From 03adc61edad49e1bbecfb53f7ea5d78f398fe368 Mon Sep 17 00:00:00 2001 From: Dan Clash Date: Thu, 12 Oct 2023 14:55:18 -0700 Subject: audit,io_uring: io_uring openat triggers audit reference count underflow An io_uring openat operation can update an audit reference count from multiple threads resulting in the call trace below. A call to io_uring_submit() with a single openat op with a flag of IOSQE_ASYNC results in the following reference count updates. These first part of the system call performs two increments that do not race. do_syscall_64() __do_sys_io_uring_enter() io_submit_sqes() io_openat_prep() __io_openat_prep() getname() getname_flags() /* update 1 (increment) */ __audit_getname() /* update 2 (increment) */ The openat op is queued to an io_uring worker thread which starts the opportunity for a race. The system call exit performs one decrement. do_syscall_64() syscall_exit_to_user_mode() syscall_exit_to_user_mode_prepare() __audit_syscall_exit() audit_reset_context() putname() /* update 3 (decrement) */ The io_uring worker thread performs one increment and two decrements. These updates can race with the system call decrement. io_wqe_worker() io_worker_handle_work() io_wq_submit_work() io_issue_sqe() io_openat() io_openat2() do_filp_open() path_openat() __audit_inode() /* update 4 (increment) */ putname() /* update 5 (decrement) */ __audit_uring_exit() audit_reset_context() putname() /* update 6 (decrement) */ The fix is to change the refcnt member of struct audit_names from int to atomic_t. kernel BUG at fs/namei.c:262! Call Trace: ... ? putname+0x68/0x70 audit_reset_context.part.0.constprop.0+0xe1/0x300 __audit_uring_exit+0xda/0x1c0 io_issue_sqe+0x1f3/0x450 ? lock_timer_base+0x3b/0xd0 io_wq_submit_work+0x8d/0x2b0 ? __try_to_del_timer_sync+0x67/0xa0 io_worker_handle_work+0x17c/0x2b0 io_wqe_worker+0x10a/0x350 Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/MW2PR2101MB1033FFF044A258F84AEAA584F1C9A@MW2PR2101MB1033.namprd21.prod.outlook.com/ Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring") Signed-off-by: Dan Clash Link: https://lore.kernel.org/r/20231012215518.GA4048@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- fs/namei.c | 9 +++++---- include/linux/fs.h | 2 +- kernel/auditsc.c | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 567ee547492b..94565bd7e73f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -188,7 +188,7 @@ getname_flags(const char __user *filename, int flags, int *empty) } } - result->refcnt = 1; + atomic_set(&result->refcnt, 1); /* The empty path is special. */ if (unlikely(!len)) { if (empty) @@ -249,7 +249,7 @@ getname_kernel(const char * filename) memcpy((char *)result->name, filename, len); result->uptr = NULL; result->aname = NULL; - result->refcnt = 1; + atomic_set(&result->refcnt, 1); audit_getname(result); return result; @@ -261,9 +261,10 @@ void putname(struct filename *name) if (IS_ERR(name)) return; - BUG_ON(name->refcnt <= 0); + if (WARN_ON_ONCE(!atomic_read(&name->refcnt))) + return; - if (--name->refcnt > 0) + if (!atomic_dec_and_test(&name->refcnt)) return; if (name->name != name->iname) { diff --git a/include/linux/fs.h b/include/linux/fs.h index b528f063e8ff..4a40823c3c67 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2403,7 +2403,7 @@ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ - int refcnt; + atomic_t refcnt; struct audit_names *aname; const char iname[]; }; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 21d2fa815e78..6f0d6fb6523f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2212,7 +2212,7 @@ __audit_reusename(const __user char *uptr) if (!n->name) continue; if (n->name->uptr == uptr) { - n->name->refcnt++; + atomic_inc(&n->name->refcnt); return n->name; } } @@ -2241,7 +2241,7 @@ void __audit_getname(struct filename *name) n->name = name; n->name_len = AUDIT_NAME_FULL; name->aname = n; - name->refcnt++; + atomic_inc(&name->refcnt); } static inline int audit_copy_fcaps(struct audit_names *name, @@ -2373,7 +2373,7 @@ out_alloc: return; if (name) { n->name = name; - name->refcnt++; + atomic_inc(&name->refcnt); } out: @@ -2500,7 +2500,7 @@ void __audit_inode_child(struct inode *parent, if (found_parent) { found_child->name = found_parent->name; found_child->name_len = AUDIT_NAME_FULL; - found_child->name->refcnt++; + atomic_inc(&found_child->name->refcnt); } } -- cgit v1.2.3 From 8702cf12e6ba91616a72d684e90357977972991b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 9 Oct 2023 18:38:14 -0700 Subject: tcp: Fix listen() warning with v4-mapped-v6 address. syzbot reported a warning [0] introduced by commit c48ef9c4aed3 ("tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address."). After the cited commit, a v4 socket's address matches the corresponding v4-mapped-v6 tb2 in inet_bind2_bucket_match_addr(), not vice versa. During X.X.X.X -> ::ffff:X.X.X.X order bind()s, the second bind() uses bhash and conflicts properly without checking bhash2 so that we need not check if a v4-mapped-v6 sk matches the corresponding v4 address tb2 in inet_bind2_bucket_match_addr(). However, the repro shows that we need to check that in a no-conflict case. The repro bind()s two sockets to the 2-tuples using SO_REUSEPORT and calls listen() for the first socket: from socket import * s1 = socket() s1.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1) s1.bind(('127.0.0.1', 0)) s2 = socket(AF_INET6) s2.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1) s2.bind(('::ffff:127.0.0.1', s1.getsockname()[1])) s1.listen() The second socket should belong to the first socket's tb2, but the second bind() creates another tb2 bucket because inet_bind2_bucket_find() returns NULL in inet_csk_get_port() as the v4-mapped-v6 sk does not match the corresponding v4 address tb2. bhash2[] -> tb2(::ffff:X.X.X.X) -> tb2(X.X.X.X) Then, listen() for the first socket calls inet_csk_get_port(), where the v4 address matches the v4-mapped-v6 tb2 and WARN_ON() is triggered. To avoid that, we need to check if v4-mapped-v6 sk address matches with the corresponding v4 address tb2 in inet_bind2_bucket_match(). The same checks are needed in inet_bind2_bucket_addr_match() too, so we can move all checks there and call it from inet_bind2_bucket_match(). Note that now tb->family is just an address family of tb->(v6_)?rcv_saddr and not of sockets in the bucket. This could be refactored later by defining tb->rcv_saddr as tb->v6_rcv_saddr.s6_addr32[3] and prepending ::ffff: when creating v4 tb2. [0]: WARNING: CPU: 0 PID: 5049 at net/ipv4/inet_connection_sock.c:587 inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587 Modules linked in: CPU: 0 PID: 5049 Comm: syz-executor288 Not tainted 6.6.0-rc2-syzkaller-00018-g2cf0f7156238 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 RIP: 0010:inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587 Code: 7c 24 08 e8 4c b6 8a 01 31 d2 be 88 01 00 00 48 c7 c7 e0 94 ae 8b e8 59 2e a3 f8 2e 2e 2e 31 c0 e9 04 fe ff ff e8 ca 88 d0 f8 <0f> 0b e9 0f f9 ff ff e8 be 88 d0 f8 49 8d 7e 48 e8 65 ca 5a 00 31 RSP: 0018:ffffc90003abfbf0 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff888026429100 RCX: 0000000000000000 RDX: ffff88807edcbb80 RSI: ffffffff88b73d66 RDI: ffff888026c49f38 RBP: ffff888026c49f30 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff9260f200 R13: ffff888026c49880 R14: 0000000000000000 R15: ffff888026429100 FS: 00005555557d5380(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000045ad50 CR3: 0000000025754000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: inet_csk_listen_start+0x155/0x360 net/ipv4/inet_connection_sock.c:1256 __inet_listen_sk+0x1b8/0x5c0 net/ipv4/af_inet.c:217 inet_listen+0x93/0xd0 net/ipv4/af_inet.c:239 __sys_listen+0x194/0x270 net/socket.c:1866 __do_sys_listen net/socket.c:1875 [inline] __se_sys_listen net/socket.c:1873 [inline] __x64_sys_listen+0x53/0x80 net/socket.c:1873 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f3a5bce3af9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffc1a1c79e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000032 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3a5bce3af9 RDX: 00007f3a5bce3af9 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007f3a5bd565f0 R08: 0000000000000006 R09: 0000000000000006 R10: 0000000000000006 R11: 0000000000000246 R12: 0000000000000001 R13: 431bde82d7b634db R14: 0000000000000001 R15: 0000000000000001 Fixes: c48ef9c4aed3 ("tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address.") Reported-by: syzbot+71e724675ba3958edb31@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=71e724675ba3958edb31 Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231010013814.70571-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index c32f5e28758b..598c1b114d2c 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -149,8 +149,14 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb2->family) - return false; + if (sk->sk_family != tb2->family) { + if (sk->sk_family == AF_INET) + return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) && + tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; + + return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) && + sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr; + } if (sk->sk_family == AF_INET6) return ipv6_addr_equal(&tb2->v6_rcv_saddr, @@ -819,19 +825,7 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, tb->l3mdev != l3mdev) return false; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb->family) { - if (sk->sk_family == AF_INET) - return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) && - tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; - - return false; - } - - if (sk->sk_family == AF_INET6) - return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); -#endif - return tb->rcv_saddr == sk->sk_rcv_saddr; + return inet_bind2_bucket_addr_match(tb, sk); } bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, -- cgit v1.2.3 From dbb13378ba306484214b84304e232723a2aaf10a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 10 Oct 2023 16:21:12 +0300 Subject: selftests: fib_tests: Disable RP filter in multipath list receive test The test relies on the fib:fib_table_lookup trace point being triggered once for each forwarded packet. If RP filter is not disabled, the trace point will be triggered twice for each packet (for source validation and forwarding), potentially masking actual bugs. Fix by explicitly disabling RP filter. Before: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (1.99) [ OK ] After: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (.99) [ OK ] Fixes: 8ae9efb859c0 ("selftests: fib_tests: Add multipath list receive tests") Reported-by: kernel test robot Closes: https://lore.kernel.org/netdev/202309191658.c00d8b8-oliver.sang@intel.com/ Tested-by: kernel test robot Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Tested-by: Sriram Yagnaraman Link: https://lore.kernel.org/r/20231010132113.3014691-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index e7d2a530618a..0dbb26b4fa4a 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -2437,6 +2437,9 @@ ipv4_mpath_list_test() run_cmd "ip -n ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') -- cgit v1.2.3 From aa13e5241a8fa32b26d5a6b8b63d04c6357243f4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 10 Oct 2023 16:21:13 +0300 Subject: selftests: fib_tests: Count all trace point invocations The tests rely on the IPv{4,6} FIB trace points being triggered once for each forwarded packet. If receive processing is deferred to the ksoftirqd task these invocations will not be counted and the tests will fail. Fix by specifying the '-a' flag to avoid perf from filtering on the mausezahn task. Before: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (.68) [FAIL] # ./fib_tests.sh -t ipv6_mpath_list IPv6 multipath list receive tests TEST: Multipath route hit ratio (.27) [FAIL] After: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (1.00) [ OK ] # ./fib_tests.sh -t ipv6_mpath_list IPv6 multipath list receive tests TEST: Multipath route hit ratio (.99) [ OK ] Fixes: 8ae9efb859c0 ("selftests: fib_tests: Add multipath list receive tests") Reported-by: kernel test robot Closes: https://lore.kernel.org/netdev/202309191658.c00d8b8-oliver.sang@intel.com/ Tested-by: kernel test robot Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Tested-by: Sriram Yagnaraman Link: https://lore.kernel.org/r/20231010132113.3014691-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 0dbb26b4fa4a..66d0db7a2614 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -2452,7 +2452,7 @@ ipv4_mpath_list_test() # words, the FIB lookup tracepoint needs to be triggered for every # packet. local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) - run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff @@ -2497,7 +2497,7 @@ ipv6_mpath_list_test() # words, the FIB lookup tracepoint needs to be triggered for every # packet. local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) - run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff -- cgit v1.2.3 From 61b40cefe51af005c72dbdcf975a3d166c6e6406 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 11 Oct 2023 11:24:19 +0800 Subject: net: dsa: bcm_sf2: Fix possible memory leak in bcm_sf2_mdio_register() In bcm_sf2_mdio_register(), the class_find_device() will call get_device() to increment reference count for priv->master_mii_bus->dev if of_mdio_find_bus() succeeds. If mdiobus_alloc() or mdiobus_register() fails, it will call get_device() twice without decrement reference count for the device. And it is the same if bcm_sf2_mdio_register() succeeds but fails in bcm_sf2_sw_probe(), or if bcm_sf2_sw_probe() succeeds. If the reference count has not decremented to zero, the dev related resource will not be freed. So remove the get_device() in bcm_sf2_mdio_register(), and call put_device() if mdiobus_alloc() or mdiobus_register() fails and in bcm_sf2_mdio_unregister() to solve the issue. And as Simon suggested, unwind from errors for bcm_sf2_mdio_register() and just return 0 if it succeeds to make it cleaner. Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus") Signed-off-by: Jinjie Ruan Suggested-by: Simon Horman Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231011032419.2423290-1-ruanjinjie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 72374b066f64..cd1f240c90f3 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -617,17 +617,16 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio"); priv->master_mii_bus = of_mdio_find_bus(dn); if (!priv->master_mii_bus) { - of_node_put(dn); - return -EPROBE_DEFER; + err = -EPROBE_DEFER; + goto err_of_node_put; } - get_device(&priv->master_mii_bus->dev); priv->master_mii_dn = dn; priv->slave_mii_bus = mdiobus_alloc(); if (!priv->slave_mii_bus) { - of_node_put(dn); - return -ENOMEM; + err = -ENOMEM; + goto err_put_master_mii_bus_dev; } priv->slave_mii_bus->priv = priv; @@ -684,11 +683,17 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) } err = mdiobus_register(priv->slave_mii_bus); - if (err && dn) { - mdiobus_free(priv->slave_mii_bus); - of_node_put(dn); - } + if (err && dn) + goto err_free_slave_mii_bus; + return 0; + +err_free_slave_mii_bus: + mdiobus_free(priv->slave_mii_bus); +err_put_master_mii_bus_dev: + put_device(&priv->master_mii_bus->dev); +err_of_node_put: + of_node_put(dn); return err; } @@ -696,6 +701,7 @@ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv) { mdiobus_unregister(priv->slave_mii_bus); mdiobus_free(priv->slave_mii_bus); + put_device(&priv->master_mii_bus->dev); of_node_put(priv->master_mii_dn); } -- cgit v1.2.3 From 242e34500a32631f85c2b4eb6cb42a368a39e54f Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 10 Oct 2023 13:30:59 -0700 Subject: ice: fix over-shifted variable Since the introduction of the ice driver the code has been double-shifting the RSS enabling field, because the define already has shifts in it and can't have the regular pattern of "a << shiftval & mask" applied. Most places in the code got it right, but one line was still wrong. Fix this one location for easy backports to stable. An in-progress patch fixes the defines to "standard" and will be applied as part of the regular -next process sometime after this one. Fixes: d76a60ba7afb ("ice: Add support for VLANs and offloads") Reviewed-by: Przemek Kitszel CC: stable@vger.kernel.org Signed-off-by: Jesse Brandeburg Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20231010203101.406248-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_lib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 7bf9b7069754..73bbf06a76db 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1201,8 +1201,7 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | - ((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) & - ICE_AQ_VSI_Q_OPT_RSS_HASH_M); + (hash_type & ICE_AQ_VSI_Q_OPT_RSS_HASH_M); } static void -- cgit v1.2.3 From 419ce133ab928ab5efd7b50b2ef36ddfd4eadbd2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 11 Oct 2023 09:20:55 +0200 Subject: tcp: allow again tcp_disconnect() when threads are waiting As reported by Tom, .NET and applications build on top of it rely on connect(AF_UNSPEC) to async cancel pending I/O operations on TCP socket. The blamed commit below caused a regression, as such cancellation can now fail. As suggested by Eric, this change addresses the problem explicitly causing blocking I/O operation to terminate immediately (with an error) when a concurrent disconnect() is executed. Instead of tracking the number of threads blocked on a given socket, track the number of disconnect() issued on such socket. If such counter changes after a blocking operation releasing and re-acquiring the socket lock, error out the current operation. Fixes: 4faeee0cf8a5 ("tcp: deny tcp_disconnect() when threads are waiting") Reported-by: Tom Deseyn Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1886305 Suggested-by: Eric Dumazet Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/f3b95e47e3dbed840960548aebaa8d954372db41.1697008693.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/chtls/chtls_io.c | 36 +++++++++++++++++----- include/net/sock.h | 10 +++--- net/core/stream.c | 12 +++++--- net/ipv4/af_inet.c | 10 ++++-- net/ipv4/inet_connection_sock.c | 1 - net/ipv4/tcp.c | 16 +++++----- net/ipv4/tcp_bpf.c | 4 +++ net/mptcp/protocol.c | 7 ----- net/tls/tls_main.c | 10 ++++-- net/tls/tls_sw.c | 19 ++++++++---- 10 files changed, 80 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 5fc64e47568a..d567e42e1760 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -911,7 +911,7 @@ static int csk_wait_memory(struct chtls_dev *cdev, struct sock *sk, long *timeo_p) { DEFINE_WAIT_FUNC(wait, woken_wake_function); - int err = 0; + int ret, err = 0; long current_timeo; long vm_wait = 0; bool noblock; @@ -942,10 +942,13 @@ static int csk_wait_memory(struct chtls_dev *cdev, set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, sk->sk_err || - (sk->sk_shutdown & SEND_SHUTDOWN) || - (csk_mem_free(cdev, sk) && !vm_wait), &wait); + ret = sk_wait_event(sk, ¤t_timeo, sk->sk_err || + (sk->sk_shutdown & SEND_SHUTDOWN) || + (csk_mem_free(cdev, sk) && !vm_wait), + &wait); sk->sk_write_pending--; + if (ret < 0) + goto do_error; if (vm_wait) { vm_wait -= current_timeo; @@ -1348,6 +1351,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int copied = 0; int target; long timeo; + int ret; buffers_freed = 0; @@ -1423,7 +1427,11 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (copied >= target) break; chtls_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + copied = copied ? : ret; + goto unlock; + } continue; found_ok_skb: if (!skb->len) { @@ -1518,6 +1526,8 @@ skip_copy: if (buffers_freed) chtls_cleanup_rbuf(sk, copied); + +unlock: release_sock(sk); return copied; } @@ -1534,6 +1544,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, int copied = 0; size_t avail; /* amount of available data in current skb */ long timeo; + int ret; lock_sock(sk); timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); @@ -1585,7 +1596,12 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, release_sock(sk); lock_sock(sk); } else { - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + /* here 'copied' is 0 due to previous checks */ + copied = ret; + break; + } } if (unlikely(peek_seq != tp->copied_seq)) { @@ -1656,6 +1672,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int copied = 0; long timeo; int target; /* Read at least this many bytes */ + int ret; buffers_freed = 0; @@ -1747,7 +1764,11 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (copied >= target) break; chtls_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + copied = copied ? : ret; + goto unlock; + } continue; found_ok_skb: @@ -1816,6 +1837,7 @@ skip_copy: if (buffers_freed) chtls_cleanup_rbuf(sk, copied); +unlock: release_sock(sk); return copied; } diff --git a/include/net/sock.h b/include/net/sock.h index b770261fbdaf..92f7ea62a915 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -336,7 +336,7 @@ struct sk_filter; * @sk_cgrp_data: cgroup data for this cgroup * @sk_memcg: this socket's memory cgroup association * @sk_write_pending: a write to stream socket waits to start - * @sk_wait_pending: number of threads blocked on this socket + * @sk_disconnects: number of disconnect operations performed on this sock * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed * @sk_write_space: callback to indicate there is bf sending space available @@ -429,7 +429,7 @@ struct sock { unsigned int sk_napi_id; #endif int sk_rcvbuf; - int sk_wait_pending; + int sk_disconnects; struct sk_filter __rcu *sk_filter; union { @@ -1189,8 +1189,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } #define sk_wait_event(__sk, __timeo, __condition, __wait) \ - ({ int __rc; \ - __sk->sk_wait_pending++; \ + ({ int __rc, __dis = __sk->sk_disconnects; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ @@ -1200,8 +1199,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } \ sched_annotate_sleep(); \ lock_sock(__sk); \ - __sk->sk_wait_pending--; \ - __rc = __condition; \ + __rc = __dis == __sk->sk_disconnects ? __condition : -EPIPE; \ __rc; \ }) diff --git a/net/core/stream.c b/net/core/stream.c index f5c4e47df165..96fbcb9bbb30 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -117,7 +117,7 @@ EXPORT_SYMBOL(sk_stream_wait_close); */ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) { - int err = 0; + int ret, err = 0; long vm_wait = 0; long current_timeo = *timeo_p; DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -142,11 +142,13 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || - (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || - (sk_stream_memory_free(sk) && - !vm_wait), &wait); + ret = sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || + (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || + (sk_stream_memory_free(sk) && !vm_wait), + &wait); sk->sk_write_pending--; + if (ret < 0) + goto do_error; if (vm_wait) { vm_wait -= current_timeo; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3d2e30e20473..2713c9b06c4c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -597,7 +597,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; - sk->sk_wait_pending++; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -613,7 +612,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) } remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; - sk->sk_wait_pending--; return timeo; } @@ -642,6 +640,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) { + sk->sk_disconnects++; err = sk->sk_prot->disconnect(sk, flags); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; goto out; @@ -696,6 +695,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int writebias = (sk->sk_protocol == IPPROTO_TCP) && tcp_sk(sk)->fastopen_req && tcp_sk(sk)->fastopen_req->data ? 1 : 0; + int dis = sk->sk_disconnects; /* Error code is set above */ if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) @@ -704,6 +704,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = sock_intr_errno(timeo); if (signal_pending(current)) goto out; + + if (dis != sk->sk_disconnects) { + err = -EPIPE; + goto out; + } } /* Connection was closed by RST, timeout, ICMP error @@ -725,6 +730,7 @@ out: sock_error: err = sock_error(sk) ? : -ECONNABORTED; sock->state = SS_UNCONNECTED; + sk->sk_disconnects++; if (sk->sk_prot->disconnect(sk, flags)) sock->state = SS_DISCONNECTING; goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index aeebe8816689..394a498c2823 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1145,7 +1145,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); - newsk->sk_wait_pending = 0; inet_sk_set_state(newsk, TCP_SYN_RECV); newicsk->icsk_bind_hash = NULL; newicsk->icsk_bind2_hash = NULL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3f66cdeef7de..d3456cf840de 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -831,7 +831,9 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, */ if (!skb_queue_empty(&sk->sk_receive_queue)) break; - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) + break; if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; @@ -2442,7 +2444,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, __sk_flush_backlog(sk); } else { tcp_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, last); + err = sk_wait_data(sk, &timeo, last); + if (err < 0) { + err = copied ? : err; + goto out; + } } if ((flags & MSG_PEEK) && @@ -2966,12 +2972,6 @@ int tcp_disconnect(struct sock *sk, int flags) int old_state = sk->sk_state; u32 seq; - /* Deny disconnect if other threads are blocked in sk_wait_event() - * or inet_wait_for_connect(). - */ - if (sk->sk_wait_pending) - return -EBUSY; - if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 327268203001..ba2e92188124 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -307,6 +307,8 @@ msg_bytes_ready: } data = tcp_msg_wait_data(sk, psock, timeo); + if (data < 0) + return data; if (data && !sk_psock_queue_empty(psock)) goto msg_bytes_ready; copied = -EAGAIN; @@ -351,6 +353,8 @@ msg_bytes_ready: timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); data = tcp_msg_wait_data(sk, psock, timeo); + if (data < 0) + return data; if (data) { if (!sk_psock_queue_empty(psock)) goto msg_bytes_ready; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c3b83cb390d9..d1902373c974 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3098,12 +3098,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) { struct mptcp_sock *msk = mptcp_sk(sk); - /* Deny disconnect if other threads are blocked in sk_wait_event() - * or inet_wait_for_connect(). - */ - if (sk->sk_wait_pending) - return -EBUSY; - /* We are on the fastopen error path. We can't call straight into the * subflows cleanup code due to lock nesting (we are already under * msk->firstsocket lock). @@ -3173,7 +3167,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif - nsk->sk_wait_pending = 0; __mptcp_init_sock(nsk); msk = mptcp_sk(nsk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 02f583ff9239..002483e60c19 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -139,8 +139,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx) int wait_on_pending_writer(struct sock *sk, long *timeo) { - int rc = 0; DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret, rc = 0; add_wait_queue(sk_sleep(sk), &wait); while (1) { @@ -154,9 +154,13 @@ int wait_on_pending_writer(struct sock *sk, long *timeo) break; } - if (sk_wait_event(sk, timeo, - !READ_ONCE(sk->sk_write_pending), &wait)) + ret = sk_wait_event(sk, timeo, + !READ_ONCE(sk->sk_write_pending), &wait); + if (ret) { + if (ret < 0) + rc = ret; break; + } } remove_wait_queue(sk_sleep(sk), &wait); return rc; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d1fc295b83b5..e9d1e83a859d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1291,6 +1291,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; long timeo; timeo = sock_rcvtimeo(sk, nonblock); @@ -1302,6 +1303,9 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, if (sk->sk_err) return sock_error(sk); + if (ret < 0) + return ret; + if (!skb_queue_empty(&sk->sk_receive_queue)) { tls_strp_check_rcv(&ctx->strp); if (tls_strp_msg_ready(ctx)) @@ -1320,10 +1324,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, released = true; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, - tls_strp_msg_ready(ctx) || - !sk_psock_queue_empty(psock), - &wait); + ret = sk_wait_event(sk, &timeo, + tls_strp_msg_ready(ctx) || + !sk_psock_queue_empty(psock), + &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); @@ -1852,6 +1856,7 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, bool nonblock) { long timeo; + int ret; timeo = sock_rcvtimeo(sk, nonblock); @@ -1861,14 +1866,16 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, ctx->reader_contended = 1; add_wait_queue(&ctx->wq, &wait); - sk_wait_event(sk, &timeo, - !READ_ONCE(ctx->reader_present), &wait); + ret = sk_wait_event(sk, &timeo, + !READ_ONCE(ctx->reader_present), &wait); remove_wait_queue(&ctx->wq, &wait); if (timeo <= 0) return -EAGAIN; if (signal_pending(current)) return sock_intr_errno(timeo); + if (ret < 0) + return ret; } WRITE_ONCE(ctx->reader_present, 1); -- cgit v1.2.3 From c68681ae46eaaa1640b52fe366d21a93b2185df5 Mon Sep 17 00:00:00 2001 From: Albert Huang Date: Wed, 11 Oct 2023 15:48:51 +0800 Subject: net/smc: fix smc clc failed issue when netdevice not in init_net If the netdevice is within a container and communicates externally through network technologies such as VxLAN, we won't be able to find routing information in the init_net namespace. To address this issue, we need to add a struct net parameter to the smc_ib_find_route function. This allow us to locate the routing information within the corresponding net namespace, ensuring the correct completion of the SMC CLC interaction. Fixes: e5c4744cfb59 ("net/smc: add SMC-Rv2 connection establishment") Signed-off-by: Albert Huang Reviewed-by: Dust Li Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/20231011074851.95280-1-huangjie.albert@bytedance.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 3 ++- net/smc/smc_ib.c | 7 ++++--- net/smc/smc_ib.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index bacdd971615e..7a874da90c7f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, (struct smc_clc_msg_accept_confirm_v2 *)aclc; struct smc_clc_first_contact_ext *fce = smc_get_clc_first_contact_ext(clc_v2, false); + struct net *net = sock_net(&smc->sk); int rc; if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1) @@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN); ini->smcrv2.uses_gateway = false; } else { - if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr, + if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr, smc_ib_gid_to_ipv4(aclc->r0.lcl.gid), ini->smcrv2.nexthop_mac, &ini->smcrv2.uses_gateway)) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9b66d6aeeb1a..89981dbe46c9 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; } -int smc_ib_find_route(__be32 saddr, __be32 daddr, +int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, u8 nexthop_mac[], u8 *uses_gateway) { struct neighbour *neigh = NULL; @@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr, if (daddr == cpu_to_be32(INADDR_NONE)) goto out; - rt = ip_route_output_flow(&init_net, &fl4, NULL); + rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto out; if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) @@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev, if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) { struct in_device *in_dev = __in_dev_get_rcu(ndev); + struct net *net = dev_net(ndev); const struct in_ifaddr *ifa; bool subnet_match = false; @@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev, } if (!subnet_match) goto out; - if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr, + if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr, smcrv2->daddr, smcrv2->nexthop_mac, &smcrv2->uses_gateway)) diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 4df5f8c8a0a1..ef8ac2b7546d 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk, int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index, struct smc_init_info_smcrv2 *smcrv2); -int smc_ib_find_route(__be32 saddr, __be32 daddr, +int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, u8 nexthop_mac[], u8 *uses_gateway); bool smc_ib_is_valid_local_systemid(void); int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); -- cgit v1.2.3 From 0f4d44f6ee048818abf80c69b6bc48927c178abe Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Oct 2023 13:58:11 +0200 Subject: netlink: specs: devlink: fix reply command values Make sure that the command values used for replies are correct. This is only affecting generated userspace helpers, no change on kernel code. Fixes: 7199c86247e9 ("netlink: specs: devlink: add commands that do per-instance dump") Signed-off-by: Jiri Pirko Link: https://lore.kernel.org/r/20231012115811.298129-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 18 +++++------ tools/net/ynl/generated/devlink-user.c | 54 ++++++++++++++++---------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index d1ebcd927149..065661acb878 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -323,7 +323,7 @@ operations: - dev-name - sb-index reply: &sb-get-reply - value: 11 + value: 13 attributes: *sb-id-attrs dump: request: @@ -350,7 +350,7 @@ operations: - sb-index - sb-pool-index reply: &sb-pool-get-reply - value: 15 + value: 17 attributes: *sb-pool-id-attrs dump: request: @@ -378,7 +378,7 @@ operations: - sb-index - sb-pool-index reply: &sb-port-pool-get-reply - value: 19 + value: 21 attributes: *sb-port-pool-id-attrs dump: request: @@ -407,7 +407,7 @@ operations: - sb-pool-type - sb-tc-index reply: &sb-tc-pool-bind-get-reply - value: 23 + value: 25 attributes: *sb-tc-pool-bind-id-attrs dump: request: @@ -538,7 +538,7 @@ operations: - dev-name - trap-name reply: &trap-get-reply - value: 61 + value: 63 attributes: *trap-id-attrs dump: request: @@ -564,7 +564,7 @@ operations: - dev-name - trap-group-name reply: &trap-group-get-reply - value: 65 + value: 67 attributes: *trap-group-id-attrs dump: request: @@ -590,7 +590,7 @@ operations: - dev-name - trap-policer-id reply: &trap-policer-get-reply - value: 69 + value: 71 attributes: *trap-policer-id-attrs dump: request: @@ -617,7 +617,7 @@ operations: - port-index - rate-node-name reply: &rate-get-reply - value: 74 + value: 76 attributes: *rate-id-attrs dump: request: @@ -643,7 +643,7 @@ operations: - dev-name - linecard-index reply: &linecard-get-reply - value: 78 + value: 80 attributes: *linecard-id-attrs dump: request: diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index 3a8d8499fab6..2cb2518500cb 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -16,19 +16,19 @@ static const char * const devlink_op_strmap[] = { [3] = "get", [7] = "port-get", - [DEVLINK_CMD_SB_GET] = "sb-get", - [DEVLINK_CMD_SB_POOL_GET] = "sb-pool-get", - [DEVLINK_CMD_SB_PORT_POOL_GET] = "sb-port-pool-get", - [DEVLINK_CMD_SB_TC_POOL_BIND_GET] = "sb-tc-pool-bind-get", + [13] = "sb-get", + [17] = "sb-pool-get", + [21] = "sb-port-pool-get", + [25] = "sb-tc-pool-bind-get", [DEVLINK_CMD_PARAM_GET] = "param-get", [DEVLINK_CMD_REGION_GET] = "region-get", [DEVLINK_CMD_INFO_GET] = "info-get", [DEVLINK_CMD_HEALTH_REPORTER_GET] = "health-reporter-get", - [DEVLINK_CMD_TRAP_GET] = "trap-get", - [DEVLINK_CMD_TRAP_GROUP_GET] = "trap-group-get", - [DEVLINK_CMD_TRAP_POLICER_GET] = "trap-policer-get", - [DEVLINK_CMD_RATE_GET] = "rate-get", - [DEVLINK_CMD_LINECARD_GET] = "linecard-get", + [63] = "trap-get", + [67] = "trap-group-get", + [71] = "trap-policer-get", + [76] = "rate-get", + [80] = "linecard-get", [DEVLINK_CMD_SELFTESTS_GET] = "selftests-get", }; @@ -838,7 +838,7 @@ devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_GET; + yrs.rsp_cmd = 13; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -876,7 +876,7 @@ devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req) yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_get_list); yds.cb = devlink_sb_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_GET; + yds.rsp_cmd = 13; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1); @@ -987,7 +987,7 @@ devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_pool_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_POOL_GET; + yrs.rsp_cmd = 17; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1026,7 +1026,7 @@ devlink_sb_pool_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_pool_get_list); yds.cb = devlink_sb_pool_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_POOL_GET; + yds.rsp_cmd = 17; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1); @@ -1147,7 +1147,7 @@ devlink_sb_port_pool_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_port_pool_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_PORT_POOL_GET; + yrs.rsp_cmd = 21; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1187,7 +1187,7 @@ devlink_sb_port_pool_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_port_pool_get_list); yds.cb = devlink_sb_port_pool_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_PORT_POOL_GET; + yds.rsp_cmd = 21; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1); @@ -1316,7 +1316,7 @@ devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET; + yrs.rsp_cmd = 25; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1356,7 +1356,7 @@ devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_tc_pool_bind_get_list); yds.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET; + yds.rsp_cmd = 25; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1); @@ -2183,7 +2183,7 @@ devlink_trap_get(struct ynl_sock *ys, struct devlink_trap_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_GET; + yrs.rsp_cmd = 63; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2223,7 +2223,7 @@ devlink_trap_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_get_list); yds.cb = devlink_trap_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_GET; + yds.rsp_cmd = 63; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GET, 1); @@ -2336,7 +2336,7 @@ devlink_trap_group_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_group_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_GROUP_GET; + yrs.rsp_cmd = 67; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2376,7 +2376,7 @@ devlink_trap_group_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_group_get_list); yds.cb = devlink_trap_group_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_GROUP_GET; + yds.rsp_cmd = 67; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_GET, 1); @@ -2483,7 +2483,7 @@ devlink_trap_policer_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_policer_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_POLICER_GET; + yrs.rsp_cmd = 71; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2523,7 +2523,7 @@ devlink_trap_policer_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_policer_get_list); yds.cb = devlink_trap_policer_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_POLICER_GET; + yds.rsp_cmd = 71; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_GET, 1); @@ -2642,7 +2642,7 @@ devlink_rate_get(struct ynl_sock *ys, struct devlink_rate_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_rate_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_RATE_GET; + yrs.rsp_cmd = 76; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2682,7 +2682,7 @@ devlink_rate_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_rate_get_list); yds.cb = devlink_rate_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_RATE_GET; + yds.rsp_cmd = 76; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_RATE_GET, 1); @@ -2786,7 +2786,7 @@ devlink_linecard_get(struct ynl_sock *ys, struct devlink_linecard_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_linecard_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_LINECARD_GET; + yrs.rsp_cmd = 80; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2825,7 +2825,7 @@ devlink_linecard_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_linecard_get_list); yds.cb = devlink_linecard_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_LINECARD_GET; + yds.rsp_cmd = 80; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_LINECARD_GET, 1); -- cgit v1.2.3 From a258c804aa8742763dce694b5e992d7ccf4294f2 Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Thu, 12 Oct 2023 08:31:44 -0400 Subject: docs: fix info about representor identification Update the "How are representors identified?" documentation subchapter. For newer kernels driver should use SET_NETDEV_DEVLINK_PORT instead of ndo_get_devlink_port() callback. Fixes: 7712b3e966ea ("Merge branch 'net-fix-netdev-to-devlink_port-linkage-and-expose-to-user'") Signed-off-by: Mateusz Polchlopek Reviewed-by: Wojciech Drewek Reviewed-by: Przemek Kitszel Reviewed-by: Edward Cree Link: https://lore.kernel.org/r/20231012123144.15768-1-mateusz.polchlopek@intel.com Signed-off-by: Jakub Kicinski --- Documentation/networking/representors.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/representors.rst b/Documentation/networking/representors.rst index ee1f5cd54496..decb39c19b9e 100644 --- a/Documentation/networking/representors.rst +++ b/Documentation/networking/representors.rst @@ -162,9 +162,11 @@ How are representors identified? The representor netdevice should *not* directly refer to a PCIe device (e.g. through ``net_dev->dev.parent`` / ``SET_NETDEV_DEV()``), either of the representee or of the switchdev function. -Instead, it should implement the ``ndo_get_devlink_port()`` netdevice op, which -the kernel uses to provide the ``phys_switch_id`` and ``phys_port_name`` sysfs -nodes. (Some legacy drivers implement ``ndo_get_port_parent_id()`` and +Instead, the driver should use the ``SET_NETDEV_DEVLINK_PORT`` macro to +assign a devlink port instance to the netdevice before registering the +netdevice; the kernel uses the devlink port to provide the ``phys_switch_id`` +and ``phys_port_name`` sysfs nodes. +(Some legacy drivers implement ``ndo_get_port_parent_id()`` and ``ndo_get_phys_port_name()`` directly, but this is deprecated.) See :ref:`Documentation/networking/devlink/devlink-port.rst ` for the details of this API. -- cgit v1.2.3 From 2c0d808f36cc6e0617f9dda055a6651c777a9d64 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Thu, 12 Oct 2023 12:16:26 +0530 Subject: net: ti: icssg-prueth: Fix tx_total_bytes count ICSSG HW stats on TX side considers 8 preamble bytes as data bytes. Due to this the tx_bytes of ICSSG interface doesn't match the rx_bytes of the link partner. There is no public errata available yet. As a workaround to fix this, decrease tx_bytes by 8 bytes for every tx frame. Fixes: c1e10d5dc7a1 ("net: ti: icssg-prueth: Add ICSSG Stats") Signed-off-by: MD Danish Anwar Link: https://lore.kernel.org/r/20231012064626.977466-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_stats.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c index bb0b33927e3b..3dbadddd7e35 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.c +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c @@ -9,6 +9,9 @@ #include "icssg_stats.h" #include +#define ICSSG_TX_PACKET_OFFSET 0xA0 +#define ICSSG_TX_BYTE_OFFSET 0xEC + static u32 stats_base[] = { 0x54c, /* Slice 0 stats start */ 0xb18, /* Slice 1 stats start */ }; @@ -18,6 +21,7 @@ void emac_update_hardware_stats(struct prueth_emac *emac) struct prueth *prueth = emac->prueth; int slice = prueth_emac_slice(emac); u32 base = stats_base[slice]; + u32 tx_pkt_cnt = 0; u32 val; int i; @@ -29,7 +33,12 @@ void emac_update_hardware_stats(struct prueth_emac *emac) base + icssg_all_stats[i].offset, val); + if (icssg_all_stats[i].offset == ICSSG_TX_PACKET_OFFSET) + tx_pkt_cnt = val; + emac->stats[i] += val; + if (icssg_all_stats[i].offset == ICSSG_TX_BYTE_OFFSET) + emac->stats[i] -= tx_pkt_cnt * 8; } } -- cgit v1.2.3 From fc6f716a5069180c40a8c9b63631e97da34f64a3 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Wed, 11 Oct 2023 16:33:32 -0700 Subject: i40e: prevent crash on probe if hw registers have invalid values The hardware provides the indexes of the first and the last available queue and VF. From the indexes, the driver calculates the numbers of queues and VFs. In theory, a faulty device might say the last index is smaller than the first index. In that case, the driver's calculation would underflow, it would attempt to write to non-existent registers outside of the ioremapped range and crash. I ran into this not by having a faulty device, but by an operator error. I accidentally ran a QE test meant for i40e devices on an ice device. The test used 'echo i40e > /sys/...ice PCI device.../driver_override', bound the driver to the device and crashed in one of the wr32 calls in i40e_clear_hw. Add checks to prevent underflows in the calculations of num_queues and num_vfs. With this fix, the wrong device probing reports errors and returns a failure without crashing. Fixes: 838d41d92a90 ("i40e: clear all queues and interrupts") Signed-off-by: Michal Schmidt Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-2-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index eeef20f77106..1b493854f522 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1082,7 +1082,7 @@ void i40e_clear_hw(struct i40e_hw *hw) I40E_PFLAN_QALLOC_FIRSTQ_SHIFT; j = (val & I40E_PFLAN_QALLOC_LASTQ_MASK) >> I40E_PFLAN_QALLOC_LASTQ_SHIFT; - if (val & I40E_PFLAN_QALLOC_VALID_MASK) + if (val & I40E_PFLAN_QALLOC_VALID_MASK && j >= base_queue) num_queues = (j - base_queue) + 1; else num_queues = 0; @@ -1092,7 +1092,7 @@ void i40e_clear_hw(struct i40e_hw *hw) I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT; j = (val & I40E_PF_VT_PFALLOC_LASTVF_MASK) >> I40E_PF_VT_PFALLOC_LASTVF_SHIFT; - if (val & I40E_PF_VT_PFALLOC_VALID_MASK) + if (val & I40E_PF_VT_PFALLOC_VALID_MASK && j >= i) num_vfs = (j - i) + 1; else num_vfs = 0; -- cgit v1.2.3 From 0288c3e709e5fabd51e84715c5c798a02f43061a Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 11 Oct 2023 16:33:33 -0700 Subject: ice: reset first in crash dump kernels When the system boots into the crash dump kernel after a panic, the ice networking device may still have pending transactions that can cause errors or machine checks when the device is re-enabled. This can prevent the crash dump kernel from loading the driver or collecting the crash data. To avoid this issue, perform a function level reset (FLR) on the ice device via PCIe config space before enabling it on the crash kernel. This will clear any outstanding transactions and stop all queues and interrupts. Restore the config space after the FLR, otherwise it was found in testing that the driver wouldn't load successfully. The following sequence causes the original issue: - Load the ice driver with modprobe ice - Enable SR-IOV with 2 VFs: echo 2 > /sys/class/net/eth0/device/sriov_num_vfs - Trigger a crash with echo c > /proc/sysrq-trigger - Load the ice driver again (or let it load automatically) with modprobe ice - The system crashes again during pcim_enable_device() Fixes: 837f08fdecbe ("ice: Add basic driver framework for Intel(R) E800 Series") Reported-by: Vishal Agrawal Reviewed-by: Jay Vosburgh Reviewed-by: Przemek Kitszel Signed-off-by: Jesse Brandeburg Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-3-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c8286adae946..6550c46e4e36 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include "ice.h" #include "ice_base.h" #include "ice_lib.h" @@ -5014,6 +5015,20 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) return -EINVAL; } + /* when under a kdump kernel initiate a reset before enabling the + * device in order to clear out any pending DMA transactions. These + * transactions can cause some systems to machine check when doing + * the pcim_enable_device() below. + */ + if (is_kdump_kernel()) { + pci_save_state(pdev); + pci_clear_master(pdev); + err = pcie_flr(pdev); + if (err) + return err; + pci_restore_state(pdev); + } + /* this driver uses devres, see * Documentation/driver-api/driver-model/devres.rst */ -- cgit v1.2.3 From 42066c4d5d344cdf8564556cdbe0aa36854fefa4 Mon Sep 17 00:00:00 2001 From: Mateusz Pacuszka Date: Wed, 11 Oct 2023 16:33:34 -0700 Subject: ice: Fix safe mode when DDP is missing One thing is broken in the safe mode, that is ice_deinit_features() is being executed even that ice_init_features() was not causing stack trace during pci_unregister_driver(). Add check on the top of the function. Fixes: 5b246e533d01 ("ice: split probe into smaller functions") Signed-off-by: Mateusz Pacuszka Signed-off-by: Jan Sokolowski Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-4-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6550c46e4e36..7784135160fd 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4684,6 +4684,9 @@ static void ice_init_features(struct ice_pf *pf) static void ice_deinit_features(struct ice_pf *pf) { + if (ice_is_safe_mode(pf)) + return; + ice_deinit_lag(pf); if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) ice_cfg_lldp_mib_change(&pf->hw, false); -- cgit v1.2.3 From e2cb5cc822b6c9ee72c56ce1d81671b22c05406a Mon Sep 17 00:00:00 2001 From: Jeffery Miller Date: Fri, 13 Oct 2023 15:23:49 -0700 Subject: Input: psmouse - fix fast_reconnect function for PS/2 mode When the SMBus connection is attempted psmouse_smbus_init() sets the fast_reconnect pointer to psmouse_smbus_reconnecti(). If SMBus initialization fails, elantech_setup_ps2() and synaptics_init_ps2() will fallback to PS/2 mode, replacing the psmouse private data. This can cause issues on resume, since psmouse_smbus_reconnect() expects to find an instance of struct psmouse_smbus_dev in psmouse->private. The issue was uncovered when in 92e24e0e57f7 ("Input: psmouse - add delay when deactivating for SMBus mode") psmouse_smbus_reconnect() started attempting to use more of the data structure. The commit was since reverted, not because it was at fault, but because there was found a better way of doing what it was attempting to do. Fix the problem by resetting the fast_reconnect pointer in psmouse structure in elantech_setup_ps2() and synaptics_init_ps2() when the PS/2 mode is used. Reported-by: Thorsten Leemhuis Tested-by: Thorsten Leemhuis Signed-off-by: Jeffery Miller Fixes: bf232e460a35 ("Input: psmouse-smbus - allow to control psmouse_deactivate") Link: https://lore.kernel.org/r/20231005002249.554877-1-jefferymiller@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 1 + drivers/input/mouse/synaptics.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 2118b2075f43..4e38229404b4 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -2114,6 +2114,7 @@ static int elantech_setup_ps2(struct psmouse *psmouse, psmouse->protocol_handler = elantech_process_byte; psmouse->disconnect = elantech_disconnect; psmouse->reconnect = elantech_reconnect; + psmouse->fast_reconnect = NULL; psmouse->pktsize = info->hw_version > 1 ? 6 : 4; return 0; diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index ada299ec5bba..cefc74b3b34b 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1623,6 +1623,7 @@ static int synaptics_init_ps2(struct psmouse *psmouse, psmouse->set_rate = synaptics_set_rate; psmouse->disconnect = synaptics_disconnect; psmouse->reconnect = synaptics_reconnect; + psmouse->fast_reconnect = NULL; psmouse->cleanup = synaptics_reset; /* Synaptics can usually stay in sync without extra help */ psmouse->resync_time = 0; -- cgit v1.2.3 From 5030b2fe6aab37fe42d14f31842ea38be7c55c57 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 13 Oct 2023 17:29:57 -0700 Subject: Input: synaptics-rmi4 - handle reset delay when using SMBus trsnsport Touch controllers need some time after receiving reset command for the firmware to finish re-initializing and be ready to respond to commands from the host. The driver already had handling for the post-reset delay for I2C and SPI transports, this change adds the handling to SMBus-connected devices. SMBus devices are peculiar because they implement legacy PS/2 compatibility mode, so reset is actually issued by psmouse driver on the associated serio port, after which the control is passed to the RMI4 driver with SMBus companion device. Note that originally the delay was added to psmouse driver in 92e24e0e57f7 ("Input: psmouse - add delay when deactivating for SMBus mode"), but that resulted in an unwanted delay in "fast" reconnect handler for the serio port, so it was decided to revert the patch and have the delay being handled in the RMI4 driver, similar to the other transports. Tested-by: Jeffery Miller Link: https://lore.kernel.org/r/ZR1yUFJ8a9Zt606N@penguin Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + drivers/input/rmi4/rmi_smbus.c | 50 +++++++++++++++++++++++------------------ 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index cefc74b3b34b..22d16d80efb9 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1753,6 +1753,7 @@ static int synaptics_create_intertouch(struct psmouse *psmouse, psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids) && !SYN_CAP_EXT_BUTTONS_STICK(info->ext_cap_10); const struct rmi_device_platform_data pdata = { + .reset_delay_ms = 30, .sensor_pdata = { .sensor_type = rmi_sensor_touchpad, .axis_align.flip_y = true, diff --git a/drivers/input/rmi4/rmi_smbus.c b/drivers/input/rmi4/rmi_smbus.c index 7059a2762aeb..b0b099b5528a 100644 --- a/drivers/input/rmi4/rmi_smbus.c +++ b/drivers/input/rmi4/rmi_smbus.c @@ -235,12 +235,29 @@ static void rmi_smb_clear_state(struct rmi_smb_xport *rmi_smb) static int rmi_smb_enable_smbus_mode(struct rmi_smb_xport *rmi_smb) { - int retval; + struct i2c_client *client = rmi_smb->client; + int smbus_version; + + /* + * psmouse driver resets the controller, we only need to wait + * to give the firmware chance to fully reinitialize. + */ + if (rmi_smb->xport.pdata.reset_delay_ms) + msleep(rmi_smb->xport.pdata.reset_delay_ms); /* we need to get the smbus version to activate the touchpad */ - retval = rmi_smb_get_version(rmi_smb); - if (retval < 0) - return retval; + smbus_version = rmi_smb_get_version(rmi_smb); + if (smbus_version < 0) + return smbus_version; + + rmi_dbg(RMI_DEBUG_XPORT, &client->dev, "Smbus version is %d", + smbus_version); + + if (smbus_version != 2 && smbus_version != 3) { + dev_err(&client->dev, "Unrecognized SMB version %d\n", + smbus_version); + return -ENODEV; + } return 0; } @@ -253,11 +270,10 @@ static int rmi_smb_reset(struct rmi_transport_dev *xport, u16 reset_addr) rmi_smb_clear_state(rmi_smb); /* - * we do not call the actual reset command, it has to be handled in - * PS/2 or there will be races between PS/2 and SMBus. - * PS/2 should ensure that a psmouse_reset is called before - * intializing the device and after it has been removed to be in a known - * state. + * We do not call the actual reset command, it has to be handled in + * PS/2 or there will be races between PS/2 and SMBus. PS/2 should + * ensure that a psmouse_reset is called before initializing the + * device and after it has been removed to be in a known state. */ return rmi_smb_enable_smbus_mode(rmi_smb); } @@ -272,7 +288,6 @@ static int rmi_smb_probe(struct i2c_client *client) { struct rmi_device_platform_data *pdata = dev_get_platdata(&client->dev); struct rmi_smb_xport *rmi_smb; - int smbus_version; int error; if (!pdata) { @@ -311,18 +326,9 @@ static int rmi_smb_probe(struct i2c_client *client) rmi_smb->xport.proto_name = "smb"; rmi_smb->xport.ops = &rmi_smb_ops; - smbus_version = rmi_smb_get_version(rmi_smb); - if (smbus_version < 0) - return smbus_version; - - rmi_dbg(RMI_DEBUG_XPORT, &client->dev, "Smbus version is %d", - smbus_version); - - if (smbus_version != 2 && smbus_version != 3) { - dev_err(&client->dev, "Unrecognized SMB version %d\n", - smbus_version); - return -ENODEV; - } + error = rmi_smb_enable_smbus_mode(rmi_smb); + if (error) + return error; i2c_set_clientdata(client, rmi_smb); -- cgit v1.2.3 From a65cd7ef5a864bdbbe037267c327786b7759d4c6 Mon Sep 17 00:00:00 2001 From: Matthias Berndt Date: Fri, 13 Oct 2023 15:04:36 -0700 Subject: Input: xpad - add PXN V900 support Add VID and PID to the xpad_device table to allow driver to use the PXN V900 steering wheel, which is XTYPE_XBOX360 compatible in xinput mode. Signed-off-by: Matthias Berndt Link: https://lore.kernel.org/r/4932699.31r3eYUQgx@fedora Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index b22f9c6e7fa5..f5c21565bb3c 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -273,6 +273,7 @@ static const struct xpad_device { { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, + { 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 }, { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, @@ -479,6 +480,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries controllers */ XPAD_XBOXONE_VENDOR(0x10f5), /* Turtle Beach Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ + XPAD_XBOX360_VENDOR(0x11ff), /* PXN V900 */ XPAD_XBOX360_VENDOR(0x1209), /* Ardwiino Controllers */ XPAD_XBOX360_VENDOR(0x12ab), /* Xbox 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane Xbox 360 controllers */ -- cgit v1.2.3 From b541260615f601ae1b5d6d0cc54e790de706303b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Oct 2023 13:59:59 -0700 Subject: Bluetooth: hci_event: Fix using memcmp when comparing keys memcmp is not consider safe to use with cryptographic secrets: 'Do not use memcmp() to compare security critical data, such as cryptographic secrets, because the required CPU time depends on the number of equal bytes.' While usage of memcmp for ZERO_KEY may not be considered a security critical data, it can lead to more usage of memcmp with pairing keys which could introduce more security problems. Fixes: 455c2ff0a558 ("Bluetooth: Fix BR/EDR out-of-band pairing with only initiator data") Fixes: 33155c4aae52 ("Bluetooth: hci_event: Ignore NULL link key") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7e1c875e0e88..9b252a65a7cd 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -26,6 +26,8 @@ /* Bluetooth HCI event handling. */ #include +#include +#include #include #include @@ -4754,7 +4756,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data, goto unlock; /* Ignore NULL link key against CVE-2020-26555 */ - if (!memcmp(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { + if (!crypto_memneq(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR", &ev->bdaddr); hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); @@ -5294,8 +5296,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) * available, then do not declare that OOB data is * present. */ - if (!memcmp(data->rand256, ZERO_KEY, 16) || - !memcmp(data->hash256, ZERO_KEY, 16)) + if (!crypto_memneq(data->rand256, ZERO_KEY, 16) || + !crypto_memneq(data->hash256, ZERO_KEY, 16)) return 0x00; return 0x02; @@ -5305,8 +5307,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) * not supported by the hardware, then check that if * P-192 data values are present. */ - if (!memcmp(data->rand192, ZERO_KEY, 16) || - !memcmp(data->hash192, ZERO_KEY, 16)) + if (!crypto_memneq(data->rand192, ZERO_KEY, 16) || + !crypto_memneq(data->hash192, ZERO_KEY, 16)) return 0x00; return 0x01; -- cgit v1.2.3 From 35d91d95a0cd61ebb90e0246dc917fd25e519b8c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Oct 2023 14:12:19 -0700 Subject: Bluetooth: hci_event: Fix coding style This fixes the following code style problem: ERROR: that open brace { should be on the previous line + if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) + { Fixes: 1ffc6f8cc332 ("Bluetooth: Reject connection with the device which has same BD_ADDR") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 9b252a65a7cd..7cb41ac1c258 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3273,8 +3273,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, /* Reject incoming connection from device with same BD ADDR against * CVE-2020-26555 */ - if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) - { + if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) { bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", &ev->bdaddr); hci_reject_conn(hdev, &ev->bdaddr); -- cgit v1.2.3 From 9ee252868787ab5a26012d69e335c7371f54563a Mon Sep 17 00:00:00 2001 From: Max Chou Date: Fri, 6 Oct 2023 10:47:07 +0800 Subject: Bluetooth: btrtl: Ignore error return for hci_devcd_register() If CONFIG_DEV_COREDUMP was not set, it would return -EOPNOTSUPP for hci_devcd_register(). In this commit, ignore error return for hci_devcd_register(). Otherwise Bluetooth initialization will be failed. Fixes: 044014ce85a1 ("Bluetooth: btrtl: Add Realtek devcoredump support") Cc: stable@vger.kernel.org Reported-by: Kirill A. Shutemov Closes: https://lore.kernel.org/all/ZRyqIn0_qqEFBPdy@debian.me/T/ Signed-off-by: Max Chou Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btrtl.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 84c2c2e1122f..277d039ecbb4 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -962,13 +962,10 @@ static void btrtl_dmp_hdr(struct hci_dev *hdev, struct sk_buff *skb) skb_put_data(skb, buf, strlen(buf)); } -static int btrtl_register_devcoredump_support(struct hci_dev *hdev) +static void btrtl_register_devcoredump_support(struct hci_dev *hdev) { - int err; + hci_devcd_register(hdev, btrtl_coredump, btrtl_dmp_hdr, NULL); - err = hci_devcd_register(hdev, btrtl_coredump, btrtl_dmp_hdr, NULL); - - return err; } void btrtl_set_driver_name(struct hci_dev *hdev, const char *driver_name) @@ -1255,8 +1252,7 @@ int btrtl_download_firmware(struct hci_dev *hdev, } done: - if (!err) - err = btrtl_register_devcoredump_support(hdev); + btrtl_register_devcoredump_support(hdev); return err; } -- cgit v1.2.3 From 18f547f3fc074500ab5d419cf482240324e73a7e Mon Sep 17 00:00:00 2001 From: Edward AD Date: Tue, 10 Oct 2023 13:36:57 +0800 Subject: Bluetooth: hci_sock: fix slab oob read in create_monitor_event When accessing hdev->name, the actual string length should prevail Reported-by: syzbot+c90849c50ed209d77689@syzkaller.appspotmail.com Fixes: dcda165706b9 ("Bluetooth: hci_core: Fix build warnings") Signed-off-by: Edward AD Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 5e4f718073b7..72abe54c45dd 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -488,7 +488,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) ni->type = hdev->dev_type; ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); - memcpy(ni->name, hdev->name, 8); + memcpy(ni->name, hdev->name, strlen(hdev->name)); opcode = cpu_to_le16(HCI_MON_NEW_INDEX); break; -- cgit v1.2.3 From 9d1a3c74746428102d55371fbf74b484733937d9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Oct 2023 22:31:31 +0200 Subject: Bluetooth: avoid memcmp() out of bounds warning bacmp() is a wrapper around memcpy(), which contain compile-time checks for buffer overflow. Since the hci_conn_request_evt() also calls bt_dev_dbg() with an implicit NULL pointer check, the compiler is now aware of a case where 'hdev' is NULL and treats this as meaning that zero bytes are available: In file included from net/bluetooth/hci_event.c:32: In function 'bacmp', inlined from 'hci_conn_request_evt' at net/bluetooth/hci_event.c:3276:7: include/net/bluetooth/bluetooth.h:364:16: error: 'memcmp' specified bound 6 exceeds source size 0 [-Werror=stringop-overread] 364 | return memcmp(ba1, ba2, sizeof(bdaddr_t)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Add another NULL pointer check before the bacmp() to ensure the compiler understands the code flow enough to not warn about it. Since the patch that introduced the warning is marked for stable backports, this one should also go that way to avoid introducing build regressions. Fixes: 1ffc6f8cc332 ("Bluetooth: Reject connection with the device which has same BD_ADDR") Cc: Kees Cook Cc: "Lee, Chun-Yi" Cc: Luiz Augusto von Dentz Cc: Marcel Holtmann Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Kees Cook Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7cb41ac1c258..1e1c9147356c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3273,7 +3273,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, /* Reject incoming connection from device with same BD ADDR against * CVE-2020-26555 */ - if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) { + if (hdev && !bacmp(&hdev->bdaddr, &ev->bdaddr)) { bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", &ev->bdaddr); hci_reject_conn(hdev, &ev->bdaddr); -- cgit v1.2.3 From cb3871b1cd135a6662b732fbc6b3db4afcdb4a64 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Oct 2023 09:31:44 -0700 Subject: Bluetooth: hci_sock: Correctly bounds check and pad HCI_MON_NEW_INDEX name The code pattern of memcpy(dst, src, strlen(src)) is almost always wrong. In this case it is wrong because it leaves memory uninitialized if it is less than sizeof(ni->name), and overflows ni->name when longer. Normally strtomem_pad() could be used here, but since ni->name is a trailing array in struct hci_mon_new_index, compilers that don't support -fstrict-flex-arrays=3 can't tell how large this array is via __builtin_object_size(). Instead, open-code the helper and use sizeof() since it will work correctly. Additionally mark ni->name as __nonstring since it appears to not be a %NUL terminated C string. Cc: Luiz Augusto von Dentz Cc: Edward AD Cc: Marcel Holtmann Cc: Johan Hedberg Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: linux-bluetooth@vger.kernel.org Cc: netdev@vger.kernel.org Fixes: 18f547f3fc07 ("Bluetooth: hci_sock: fix slab oob read in create_monitor_event") Link: https://lore.kernel.org/lkml/202310110908.F2639D3276@keescook/ Signed-off-by: Kees Cook Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_mon.h | 2 +- net/bluetooth/hci_sock.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 2d5fcda1bcd0..082f89531b88 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -56,7 +56,7 @@ struct hci_mon_new_index { __u8 type; __u8 bus; bdaddr_t bdaddr; - char name[8]; + char name[8] __nonstring; } __packed; #define HCI_MON_NEW_INDEX_SIZE 16 diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 72abe54c45dd..3e7cd330d731 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -488,7 +488,8 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) ni->type = hdev->dev_type; ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); - memcpy(ni->name, hdev->name, strlen(hdev->name)); + memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name, + strnlen(hdev->name, sizeof(ni->name)), '\0'); opcode = cpu_to_le16(HCI_MON_NEW_INDEX); break; -- cgit v1.2.3 From 5c15c60e7be615f05a45cd905093a54b11f461bc Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Fri, 13 Oct 2023 20:11:33 -0700 Subject: Input: powermate - fix use-after-free in powermate_config_complete syzbot has found a use-after-free bug [1] in the powermate driver. This happens when the device is disconnected, which leads to a memory free from the powermate_device struct. When an asynchronous control message completes after the kfree and its callback is invoked, the lock does not exist anymore and hence the bug. Use usb_kill_urb() on pm->config to cancel any in-progress requests upon device disconnection. [1] https://syzkaller.appspot.com/bug?extid=0434ac83f907a1dbdd1e Signed-off-by: Javier Carrasco Reported-by: syzbot+0434ac83f907a1dbdd1e@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20230916-topic-powermate_use_after_free-v3-1-64412b81a7a2@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/powermate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/misc/powermate.c b/drivers/input/misc/powermate.c index c1c733a9cb89..db2ba89adaef 100644 --- a/drivers/input/misc/powermate.c +++ b/drivers/input/misc/powermate.c @@ -425,6 +425,7 @@ static void powermate_disconnect(struct usb_interface *intf) pm->requires_update = 0; usb_kill_urb(pm->irq); input_unregister_device(pm->input); + usb_kill_urb(pm->config); usb_free_urb(pm->irq); usb_free_urb(pm->config); powermate_free_buffers(interface_to_usbdev(intf), pm); -- cgit v1.2.3 From 32db510708507f6133f496ff385cbd841d8f9098 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 11 Oct 2023 17:07:03 +0300 Subject: ovl: fix regression in showing lowerdir mount option Before commit b36a5780cb44 ("ovl: modify layer parameter parsing"), spaces and commas in lowerdir mount option value used to be escaped using seq_show_option(). In current upstream, when lowerdir value has a space, it is not escaped in /proc/mounts, e.g.: none /mnt overlay rw,relatime,lowerdir=l l,upperdir=u,workdir=w 0 0 which results in broken output of the mount utility: none on /mnt type overlay (rw,relatime,lowerdir=l) Store the original lowerdir mount options before unescaping and show them using the same escaping used for seq_show_option() in addition to escaping the colon separator character. Fixes: b36a5780cb44 ("ovl: modify layer parameter parsing") Signed-off-by: Amir Goldstein --- Documentation/filesystems/overlayfs.rst | 12 +++++++++++ fs/overlayfs/params.c | 38 ++++++++++++++++++++------------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index cdefbe73d85c..5b93268e400f 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -339,6 +339,18 @@ The specified lower directories will be stacked beginning from the rightmost one and going left. In the above example lower1 will be the top, lower2 the middle and lower3 the bottom layer. +Note: directory names containing colons can be provided as lower layer by +escaping the colons with a single backslash. For example: + + mount -t overlay overlay -olowerdir=/a\:lower\:\:dir /merged + +Since kernel version v6.5, directory names containing colons can also +be provided as lower layer using the fsconfig syscall from new mount api: + + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir", "/a:lower::dir", 0); + +In the latter case, colons in lower layer directory names will be escaped +as an octal characters (\072) when displayed in /proc/self/mountinfo. Metadata only copy up --------------------- diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 17c74ef4f089..8b6bae320e8a 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -192,7 +192,8 @@ static ssize_t ovl_parse_param_split_lowerdirs(char *str) for (s = d = str;; s++, d++) { if (*s == '\\') { - s++; + /* keep esc chars in split lowerdir */ + *d++ = *s++; } else if (*s == ':') { bool next_colon = (*(s + 1) == ':'); @@ -267,7 +268,7 @@ static void ovl_unescape(char *s) } } -static int ovl_mount_dir(const char *name, struct path *path) +static int ovl_mount_dir(const char *name, struct path *path, bool upper) { int err = -ENOMEM; char *tmp = kstrdup(name, GFP_KERNEL); @@ -276,7 +277,7 @@ static int ovl_mount_dir(const char *name, struct path *path) ovl_unescape(tmp); err = ovl_mount_dir_noesc(tmp, path); - if (!err && path->dentry->d_flags & DCACHE_OP_REAL) { + if (!err && upper && path->dentry->d_flags & DCACHE_OP_REAL) { pr_err("filesystem on '%s' not supported as upperdir\n", tmp); path_put_init(path); @@ -297,7 +298,7 @@ static int ovl_parse_param_upperdir(const char *name, struct fs_context *fc, struct path path; char *dup; - err = ovl_mount_dir(name, &path); + err = ovl_mount_dir(name, &path, true); if (err) return err; @@ -500,7 +501,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) l = &ctx->lower[nr]; memset(l, 0, sizeof(*l)); - err = ovl_mount_dir_noesc(dup_iter, &l->path); + err = ovl_mount_dir(dup_iter, &l->path, false); if (err) goto out_put; @@ -979,16 +980,23 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) struct super_block *sb = dentry->d_sb; struct ovl_fs *ofs = OVL_FS(sb); size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer; - char **lowerdatadirs = &ofs->config.lowerdirs[nr_merged_lower]; - - /* lowerdirs[] starts from offset 1 */ - seq_printf(m, ",lowerdir=%s", ofs->config.lowerdirs[1]); - /* dump regular lower layers */ - for (nr = 2; nr < nr_merged_lower; nr++) - seq_printf(m, ":%s", ofs->config.lowerdirs[nr]); - /* dump data lower layers */ - for (nr = 0; nr < ofs->numdatalayer; nr++) - seq_printf(m, "::%s", lowerdatadirs[nr]); + + /* + * lowerdirs[] starts from offset 1, then + * >= 0 regular lower layers prefixed with : and + * >= 0 data-only lower layers prefixed with :: + * + * we need to escase comma and space like seq_show_option() does and + * we also need to escape the colon separator from lowerdir paths. + */ + seq_puts(m, ",lowerdir="); + for (nr = 1; nr < ofs->numlayer; nr++) { + if (nr > 1) + seq_putc(m, ':'); + if (nr >= nr_merged_lower) + seq_putc(m, ':'); + seq_escape(m, ofs->config.lowerdirs[nr], ":, \t\n\\"); + } if (ofs->config.upperdir) { seq_show_option(m, "upperdir", ofs->config.upperdir); seq_show_option(m, "workdir", ofs->config.workdir); -- cgit v1.2.3 From beae836e9c61ee039e367a94b14f7fea08f0ad4c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 14 Oct 2023 22:30:04 +0300 Subject: ovl: temporarily disable appending lowedirs Kernel v6.5 converted overlayfs to new mount api. As an added bonus, it also added a feature to allow appending lowerdirs using lowerdir=:/lower2,lowerdir=::/data3 syntax. This new syntax has raised some concerns regarding escaping of colons. We decided to try and disable this syntax, which hasn't been in the wild for so long and introduce it again in 6.7 using explicit mount options lowerdir+=/lower2,datadir+=/data3. Suggested-by: Miklos Szeredi Link: https://lore.kernel.org/r/CAJfpegsr3A4YgF2YBevWa6n3=AcP7hNndG6EPMu3ncvV-AM71A@mail.gmail.com/ Fixes: b36a5780cb44 ("ovl: modify layer parameter parsing") Signed-off-by: Amir Goldstein --- fs/overlayfs/params.c | 52 +++------------------------------------------------ 1 file changed, 3 insertions(+), 49 deletions(-) diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 8b6bae320e8a..f6ff23fd101c 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -350,12 +350,6 @@ static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx) * Set "/lower1", "/lower2", and "/lower3" as lower layers and * "/data1" and "/data2" as data lower layers. Any existing lower * layers are replaced. - * (2) lowerdir=:/lower4 - * Append "/lower4" to current stack of lower layers. This requires - * that there already is at least one lower layer configured. - * (3) lowerdir=::/lower5 - * Append data "/lower5" as data lower layer. This requires that - * there's at least one regular lower layer present. */ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) { @@ -377,49 +371,9 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) return 0; } - if (strncmp(name, "::", 2) == 0) { - /* - * This is a data layer. - * There must be at least one regular lower layer - * specified. - */ - if (ctx->nr == 0) { - pr_err("data lower layers without regular lower layers not allowed"); - return -EINVAL; - } - - /* Skip the leading "::". */ - name += 2; - data_layer = true; - /* - * A data layer is automatically an append as there - * must've been at least one regular lower layer. - */ - append = true; - } else if (*name == ':') { - /* - * This is a regular lower layer. - * If users want to append a layer enforce that they - * have already specified a first layer before. It's - * better to be strict. - */ - if (ctx->nr == 0) { - pr_err("cannot append layer if no previous layer has been specified"); - return -EINVAL; - } - - /* - * Once a sequence of data layers has started regular - * lower layers are forbidden. - */ - if (ctx->nr_data > 0) { - pr_err("regular lower layers cannot follow data lower layers"); - return -EINVAL; - } - - /* Skip the leading ":". */ - name++; - append = true; + if (*name == ':') { + pr_err("cannot append lower layer"); + return -EINVAL; } dup = kstrdup(name, GFP_KERNEL); -- cgit v1.2.3 From 2f3389c73832ad90b63208c0fc281ad080114c7a Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 13 Oct 2023 18:48:12 +0530 Subject: qed: fix LL2 RX buffer allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver allocates the LL2 rx buffers from kmalloc() area to construct the skb using slab_build_skb() The required size allocation seems to have overlooked for accounting both skb_shared_info size and device placement padding bytes which results into the below panic when doing skb_put() for a standard MTU sized frame. skbuff: skb_over_panic: text:ffffffffc0b0225f len:1514 put:1514 head:ff3dabceaf39c000 data:ff3dabceaf39c042 tail:0x62c end:0x566 dev: … skb_panic+0x48/0x4a skb_put.cold+0x10/0x10 qed_ll2b_complete_rx_packet+0x14f/0x260 [qed] qed_ll2_rxq_handle_completion.constprop.0+0x169/0x200 [qed] qed_ll2_rxq_completion+0xba/0x320 [qed] qed_int_sp_dpc+0x1a7/0x1e0 [qed] This patch fixes this by accouting skb_shared_info and device placement padding size bytes when allocating the buffers. Cc: David S. Miller Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support") Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 717a0b3f89bd..ab5ef254a748 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -113,7 +113,10 @@ static void qed_ll2b_complete_tx_packet(void *cxt, static int qed_ll2_alloc_buffer(struct qed_dev *cdev, u8 **data, dma_addr_t *phys_addr) { - *data = kmalloc(cdev->ll2->rx_size, GFP_ATOMIC); + size_t size = cdev->ll2->rx_size + NET_SKB_PAD + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + *data = kmalloc(size, GFP_ATOMIC); if (!(*data)) { DP_INFO(cdev, "Failed to allocate LL2 buffer data\n"); return -ENOMEM; @@ -2589,7 +2592,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) INIT_LIST_HEAD(&cdev->ll2->list); spin_lock_init(&cdev->ll2->lock); - cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN + + cdev->ll2->rx_size = PRM_DMA_PAD_BYTES_NUM + ETH_HLEN + L1_CACHE_BYTES + params->mtu; /* Allocate memory for LL2. -- cgit v1.2.3 From 8a540e990d7da36813cb71a4a422712bfba448a4 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 7 Oct 2023 01:14:21 -0400 Subject: btrfs: fix stripe length calculation for non-zoned data chunk allocation Commit f6fca3917b4d "btrfs: store chunk size in space-info struct" broke data chunk allocations on non-zoned multi-device filesystems when using default chunk_size. Commit 5da431b71d4b "btrfs: fix the max chunk size and stripe length calculation" partially fixed that, and this patch completes the fix for that case. After commit f6fca3917b4d and 5da431b71d4b, the sequence of events for a data chunk allocation on a non-zoned filesystem is: 1. btrfs_create_chunk calls init_alloc_chunk_ctl, which copies space_info->chunk_size (default 10 GiB) to ctl->max_stripe_len unmodified. Before f6fca3917b4d, ctl->max_stripe_len value was 1 GiB for non-zoned data chunks and not configurable. 2. btrfs_create_chunk calls gather_device_info which consumes and produces more fields of chunk_ctl. 3. gather_device_info multiplies ctl->max_stripe_len by ctl->dev_stripes (which is 1 in all cases except dup) and calls find_free_dev_extent with that number as num_bytes. 4. find_free_dev_extent locates the first dev_extent hole on a device which is at least as large as num_bytes. With default max_chunk_size from f6fca3917b4d, it finds the first hole which is longer than 10 GiB, or the largest hole if that hole is shorter than 10 GiB. This is different from the pre-f6fca3917b4d behavior, where num_bytes is 1 GiB, and find_free_dev_extent may choose a different hole. 5. gather_device_info repeats step 4 with all devices to find the first or largest dev_extent hole that can be allocated on each device. 6. gather_device_info sorts the device list by the hole size on each device, using total unallocated space on each device to break ties, then returns to btrfs_create_chunk with the list. 7. btrfs_create_chunk calls decide_stripe_size_regular. 8. decide_stripe_size_regular finds the largest stripe_len that fits across the first nr_devs device dev_extent holes that were found by gather_device_info (and satisfies other constraints on stripe_len that are not relevant here). 9. decide_stripe_size_regular caps the length of the stripe it computed at 1 GiB. This cap appeared in 5da431b71d4b to correct one of the other regressions introduced in f6fca3917b4d. 10. btrfs_create_chunk creates a new chunk with the above computed size and number of devices. At step 4, gather_device_info() has found a location where stripe up to 10 GiB in length could be allocated on several devices, and selected which devices should have a dev_extent allocated on them, but at step 9, only 1 GiB of the space that was found on each device can be used. This mismatch causes new suboptimal chunk allocation cases that did not occur in pre-f6fca3917b4d kernels. Consider a filesystem using raid1 profile with 3 devices. After some balances, device 1 has 10x 1 GiB unallocated space, while devices 2 and 3 have 1x 10 GiB unallocated space, i.e. the same total amount of space, but distributed across different numbers of dev_extent holes. For visualization, let's ignore all the chunks that were allocated before this point, and focus on the remaining holes: Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10x 1 GiB unallocated) Device 2: [__________] (10 GiB contig unallocated) Device 3: [__________] (10 GiB contig unallocated) Before f6fca3917b4d, the allocator would fill these optimally by allocating chunks with dev_extents on devices 1 and 2 ([12]), 1 and 3 ([13]), or 2 and 3 ([23]): [after 0 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [__________] (10 GiB) Device 3: [__________] (10 GiB) [after 1 chunk allocation] Device 1: [12] [_] [_] [_] [_] [_] [_] [_] [_] [_] Device 2: [12] [_________] (9 GiB) Device 3: [__________] (10 GiB) [after 2 chunk allocations] Device 1: [12] [13] [_] [_] [_] [_] [_] [_] [_] [_] (8 GiB) Device 2: [12] [_________] (9 GiB) Device 3: [13] [_________] (9 GiB) [after 3 chunk allocations] Device 1: [12] [13] [12] [_] [_] [_] [_] [_] [_] [_] (7 GiB) Device 2: [12] [12] [________] (8 GiB) Device 3: [13] [_________] (9 GiB) [...] [after 12 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [_] [_] (2 GiB) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [__] (2 GiB) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [__] (2 GiB) [after 13 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [12] [_] (1 GiB) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [12] [_] (1 GiB) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [__] (2 GiB) [after 14 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [12] [13] (full) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [12] [_] (1 GiB) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [13] [_] (1 GiB) [after 15 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [12] [13] (full) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [12] [23] (full) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [13] [23] (full) This allocates all of the space with no waste. The sorting function used by gather_device_info considers free space holes above 1 GiB in length to be equal to 1 GiB, so once find_free_dev_extent locates a sufficiently long hole on each device, all the holes appear equal in the sort, and the comparison falls back to sorting devices by total free space. This keeps usable space on each device equal so they can all be filled completely. After f6fca3917b4d, the allocator prefers the devices with larger holes over the devices with more free space, so it makes bad allocation choices: [after 1 chunk allocation] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [_________] (9 GiB) Device 3: [23] [_________] (9 GiB) [after 2 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [23] [________] (8 GiB) Device 3: [23] [23] [________] (8 GiB) [after 3 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [23] [23] [_______] (7 GiB) Device 3: [23] [23] [23] [_______] (7 GiB) [...] [after 9 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [23] [23] [23] [23] [23] [23] [23] [23] [_] (1 GiB) Device 3: [23] [23] [23] [23] [23] [23] [23] [23] [23] [_] (1 GiB) [after 10 chunk allocations] Device 1: [12] [_] [_] [_] [_] [_] [_] [_] [_] [_] (9 GiB) Device 2: [23] [23] [23] [23] [23] [23] [23] [23] [12] (full) Device 3: [23] [23] [23] [23] [23] [23] [23] [23] [_] (1 GiB) [after 11 chunk allocations] Device 1: [12] [13] [_] [_] [_] [_] [_] [_] [_] [_] (8 GiB) Device 2: [23] [23] [23] [23] [23] [23] [23] [23] [12] (full) Device 3: [23] [23] [23] [23] [23] [23] [23] [23] [13] (full) No further allocations are possible, with 8 GiB wasted (4 GiB of data space). The sort in gather_device_info now considers free space in holes longer than 1 GiB to be distinct, so it will prefer devices 2 and 3 over device 1 until all but 1 GiB is allocated on devices 2 and 3. At that point, with only 1 GiB unallocated on every device, the largest hole length on each device is equal at 1 GiB, so the sort finally moves to ordering the devices with the most free space, but by this time it is too late to make use of the free space on device 1. Note that it's possible to contrive a case where the pre-f6fca3917b4d allocator fails the same way, but these cases generally have extensive dev_extent fragmentation as a precondition (e.g. many holes of 768M in length on one device, and few holes 1 GiB in length on the others). With the regression in f6fca3917b4d, bad chunk allocation can occur even under optimal conditions, when all dev_extent holes are exact multiples of stripe_len in length, as in the example above. Also note that post-f6fca3917b4d kernels do treat dev_extent holes larger than 10 GiB as equal, so the bad behavior won't show up on a freshly formatted filesystem; however, as the filesystem ages and fills up, and holes ranging from 1 GiB to 10 GiB in size appear, the problem can show up as a failure to balance after adding or removing devices, or an unexpected shortfall in available space due to unequal allocation. To fix the regression and make data chunk allocation work again, set ctl->max_stripe_len back to the original SZ_1G, or space_info->chunk_size if that's smaller (the latter can happen if the user set space_info->chunk_size to less than 1 GiB via sysfs, or it's a 32 MiB system chunk with a hardcoded chunk_size and stripe_len). While researching the background of the earlier commits, I found that an identical fix was already proposed at: https://lore.kernel.org/linux-btrfs/de83ac46-a4a3-88d3-85ce-255b7abc5249@gmx.com/ The previous review missed one detail: ctl->max_stripe_len is used before decide_stripe_size_regular() is called, when it is too late for the changes in that function to have any effect. ctl->max_stripe_len is not used directly by decide_stripe_size_regular(), but the parameter does heavily influence the per-device free space data presented to the function. Fixes: f6fca3917b4d ("btrfs: store chunk size in space-info struct") CC: stable@vger.kernel.org # 6.1+ Link: https://lore.kernel.org/linux-btrfs/20231007051421.19657-1-ce3g8jdj@umail.furryterror.org/ Reviewed-by: Qu Wenruo Signed-off-by: Zygo Blaxell Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e3a3769fd92e..43fee429834a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5111,7 +5111,7 @@ static void init_alloc_chunk_ctl_policy_regular( ASSERT(space_info); ctl->max_chunk_size = READ_ONCE(space_info->chunk_size); - ctl->max_stripe_size = ctl->max_chunk_size; + ctl->max_stripe_size = min_t(u64, ctl->max_chunk_size, SZ_1G); if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM) ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK); -- cgit v1.2.3 From 5720c43d5216b5dbd9ab25595f7c61e55d36d4fc Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 27 Sep 2023 13:52:46 +0800 Subject: virtio_net: fix the missing of the dma cpu sync Commit 295525e29a5b ("virtio_net: merge dma operations when filling mergeable buffers") unmaps the buffer with DMA_ATTR_SKIP_CPU_SYNC when the dma->ref is zero. We do that with DMA_ATTR_SKIP_CPU_SYNC, because we do not want to do the sync for the entire page_frag. But that misses the sync for the current area. This patch does cpu sync regardless of whether the ref is zero or not. Fixes: 295525e29a5b ("virtio_net: merge dma operations when filling mergeable buffers") Reported-by: Michael Roth Closes: http://lore.kernel.org/all/20230926130451.axgodaa6tvwqs3ut@amd.com Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: Linus Torvalds --- drivers/net/virtio_net.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fe7f314d65c9..d67f742fbd4c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -607,16 +607,16 @@ static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) --dma->ref; - if (dma->ref) { - if (dma->need_sync && len) { - offset = buf - (head + sizeof(*dma)); + if (dma->need_sync && len) { + offset = buf - (head + sizeof(*dma)); - virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, offset, - len, DMA_FROM_DEVICE); - } + virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, + offset, len, + DMA_FROM_DEVICE); + } + if (dma->ref) return; - } virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); -- cgit v1.2.3 From fc8b2a619469378717e7270d2a4e1ef93c585f7a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 11 Oct 2023 10:01:14 -0400 Subject: net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation Syzbot reported two new paths to hit an internal WARNING using the new virtio gso type VIRTIO_NET_HDR_GSO_UDP_L4. RIP: 0010:skb_checksum_help+0x4a2/0x600 net/core/dev.c:3260 skb len=64521 gso_size=344 and RIP: 0010:skb_warn_bad_offload+0x118/0x240 net/core/dev.c:3262 Older virtio types have historically had loose restrictions, leading to many entirely impractical fuzzer generated packets causing problems deep in the kernel stack. Ideally, we would have had strict validation for all types from the start. New virtio types can have tighter validation. Limit UDP GSO packets inserted via virtio to the same limits imposed by the UDP_SEGMENT socket interface: 1. must use checksum offload 2. checksum offload matches UDP header 3. no more segments than UDP_MAX_SEGMENTS 4. UDP GSO does not take modifier flags, notably SKB_GSO_TCP_ECN Fixes: 860b7f27b8f7 ("linux/virtio_net.h: Support USO offload in vnet header.") Reported-by: syzbot+01cdbc31e9c0ae9b33ac@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000005039270605eb0b7f@google.com/ Reported-by: syzbot+c99d835ff081ca30f986@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000005426680605eb0b9f@google.com/ Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 7b4dd69555e4..27cc1d464321 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,8 +3,8 @@ #define _LINUX_VIRTIO_NET_H #include +#include #include -#include #include static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) @@ -151,9 +151,22 @@ retry: unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); - /* UFO may not include transport header in gso_size. */ - if (gso_type & SKB_GSO_UDP) + switch (gso_type & ~SKB_GSO_TCP_ECN) { + case SKB_GSO_UDP: + /* UFO may not include transport header in gso_size. */ nh_off -= thlen; + break; + case SKB_GSO_UDP_L4: + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return -EINVAL; + if (skb->csum_offset != offsetof(struct udphdr, check)) + return -EINVAL; + if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (gso_type != SKB_GSO_UDP_L4) + return -EINVAL; + break; + } /* Kernel has a special handling for GSO_BY_FRAGS. */ if (gso_size == GSO_BY_FRAGS) -- cgit v1.2.3 From fbe1bf1e5ff1e3b298420d7a8434983ef8d72bd1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Oct 2023 12:02:02 -0700 Subject: Revert "x86/smp: Put CPUs into INIT on shutdown if possible" This reverts commit 45e34c8af58f23db4474e2bfe79183efec09a18b, and the two subsequent fixes to it: 3f874c9b2aae ("x86/smp: Don't send INIT to non-present and non-booted CPUs") b1472a60a584 ("x86/smp: Don't send INIT to boot CPU") because it seems to result in hung machines at shutdown. Particularly some Dell machines, but Thomas says "The rest seems to be Lenovo and Sony with Alderlake/Raptorlake CPUs - at least that's what I could figure out from the various bug reports. I don't know which CPUs the DELL machines have, so I can't say it's a pattern. I agree with the revert for now" Ashok Raj chimes in: "There was a report (probably this same one), and it turns out it was a bug in the BIOS SMI handler. The client BIOS's were waiting for the lowest APICID to be the SMI rendevous master. If this is MeteorLake, the BSP wasn't the one with the lowest APIC and it triped here. The BIOS change is also being pushed to others for assimilation :) Server BIOS's had this correctly for a while now" and it does look likely to be some bad interaction between SMI and the non-BSP cores having put into INIT (and thus unresponsive until reset). Link: https://bbs.archlinux.org/viewtopic.php?pid=2124429 Link: https://www.reddit.com/r/openSUSE/comments/16qq99b/tumbleweed_shutdown_did_not_finish_completely/ Link: https://forum.artixlinux.org/index.php/topic,5997.0.html Link: https://bugzilla.redhat.com/show_bug.cgi?id=2241279 Acked-by: Thomas Gleixner Cc: Ashok Raj Signed-off-by: Linus Torvalds --- arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/smp.c | 39 +++++++-------------------------------- arch/x86/kernel/smpboot.c | 27 --------------------------- 3 files changed, 7 insertions(+), 60 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index ad98dd1d9cfb..c31c633419fe 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -129,7 +129,6 @@ void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -bool smp_park_other_cpus_in_init(void); void smp_store_cpu_info(int id); asmlinkage __visible void smp_reboot_interrupt(void); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 6eb06d001bcc..96a771f9f930 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) } /* - * Disable virtualization, APIC etc. and park the CPU in a HLT loop + * this function calls the 'stop' function on all other CPUs in the system. */ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) { @@ -172,17 +172,13 @@ static void native_stop_other_cpus(int wait) * 2) Wait for all other CPUs to report that they reached the * HLT loop in stop_this_cpu() * - * 3) If the system uses INIT/STARTUP for CPU bringup, then - * send all present CPUs an INIT vector, which brings them - * completely out of the way. + * 3) If #2 timed out send an NMI to the CPUs which did not + * yet report * - * 4) If #3 is not possible and #2 timed out send an NMI to the - * CPUs which did not yet report - * - * 5) Wait for all other CPUs to report that they reached the + * 4) Wait for all other CPUs to report that they reached the * HLT loop in stop_this_cpu() * - * #4 can obviously race against a CPU reaching the HLT loop late. + * #3 can obviously race against a CPU reaching the HLT loop late. * That CPU will have reported already and the "have all CPUs * reached HLT" condition will be true despite the fact that the * other CPU is still handling the NMI. Again, there is no @@ -198,7 +194,7 @@ static void native_stop_other_cpus(int wait) /* * Don't wait longer than a second for IPI completion. The * wait request is not checked here because that would - * prevent an NMI/INIT shutdown in case that not all + * prevent an NMI shutdown attempt in case that not all * CPUs reach shutdown state. */ timeout = USEC_PER_SEC; @@ -206,27 +202,7 @@ static void native_stop_other_cpus(int wait) udelay(1); } - /* - * Park all other CPUs in INIT including "offline" CPUs, if - * possible. That's a safe place where they can't resume execution - * of HLT and then execute the HLT loop from overwritten text or - * page tables. - * - * The only downside is a broadcast MCE, but up to the point where - * the kexec() kernel brought all APs online again an MCE will just - * make HLT resume and handle the MCE. The machine crashes and burns - * due to overwritten text, page tables and data. So there is a - * choice between fire and frying pan. The result is pretty much - * the same. Chose frying pan until x86 provides a sane mechanism - * to park a CPU. - */ - if (smp_park_other_cpus_in_init()) - goto done; - - /* - * If park with INIT was not possible and the REBOOT_VECTOR didn't - * take all secondary CPUs offline, try with the NMI. - */ + /* if the REBOOT_VECTOR didn't work, try with the NMI */ if (!cpumask_empty(&cpus_stop_mask)) { /* * If NMI IPI is enabled, try to register the stop handler @@ -249,7 +225,6 @@ static void native_stop_other_cpus(int wait) udelay(1); } -done: local_irq_save(flags); disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 48e040618731..2a187c0cbd5b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1240,33 +1240,6 @@ void arch_thaw_secondary_cpus_end(void) cache_aps_init(); } -bool smp_park_other_cpus_in_init(void) -{ - unsigned int cpu, this_cpu = smp_processor_id(); - unsigned int apicid; - - if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu) - return false; - - /* - * If this is a crash stop which does not execute on the boot CPU, - * then this cannot use the INIT mechanism because INIT to the boot - * CPU will reset the machine. - */ - if (this_cpu) - return false; - - for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) { - if (cpu == this_cpu) - continue; - apicid = apic->cpu_present_to_apicid(cpu); - if (apicid == BAD_APICID) - continue; - send_init_sequence(apicid); - } - return true; -} - /* * Early setup to make printk work. */ -- cgit v1.2.3 From 92e37f20f20a23fec4626ae72eda50f127acb130 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:36 -0400 Subject: selftests: openvswitch: Add version check for pyroute2 Paolo Abeni reports that on some systems the pyroute2 version isn't new enough to run the test suite. Ensure that we support a minimum version of 0.6 for all cases (which does include the existing ones). The 0.6.1 version was released in May of 2021, so should be propagated to most installations at this point. The alternative that Paolo proposed was to only skip when the add-flow is being run. This would be okay for most cases, except if a future test case is added that needs to do flow dump without an associated add (just guessing). In that case, it could also be broken and we would need additional skip logic anyway. Just draw a line in the sand now. Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite") Reported-by: Paolo Abeni Closes: https://lore.kernel.org/lkml/8470c431e0930d2ea204a9363a60937289b7fdbe.camel@redhat.com/ Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 +- tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 9c2012d70b08..220c3356901e 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -525,7 +525,7 @@ run_test() { fi if python3 ovs-dpctl.py -h 2>&1 | \ - grep "Need to install the python" >/dev/null 2>&1; then + grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}" return $ksft_skip fi diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 912dc8c49085..e4c24d5edf20 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -28,8 +28,10 @@ try: from pyroute2.netlink import nlmsg_atoms from pyroute2.netlink.exceptions import NetlinkError from pyroute2.netlink.generic import GenericNetlinkSocket + import pyroute2 + except ModuleNotFoundError: - print("Need to install the python pyroute2 package.") + print("Need to install the python pyroute2 package >= 0.6.") sys.exit(0) @@ -1998,6 +2000,12 @@ def main(argv): nlmsg_atoms.ovskey = ovskey nlmsg_atoms.ovsactions = ovsactions + # version check for pyroute2 + prverscheck = pyroute2.__version__.split(".") + if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6: + print("Need to upgrade the python pyroute2 package to >= 0.6.") + sys.exit(0) + parser = argparse.ArgumentParser() parser.add_argument( "-v", -- cgit v1.2.3 From af846afad5ca1c1a24d320adf9e48255e97db84e Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:37 -0400 Subject: selftests: openvswitch: Catch cases where the tests are killed In case of fatal signal, or early abort at least cleanup the current test case. Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite") Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 220c3356901e..2a0112be7ead 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -3,6 +3,8 @@ # # OVS kernel module self tests +trap ovs_exit_sig EXIT TERM INT ERR + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -- cgit v1.2.3 From 76035fd12cb9046be00ffb9d4262b5b3277d5068 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:38 -0400 Subject: selftests: openvswitch: Skip drop testing on older kernels Kernels that don't have support for openvswitch drop reasons also won't have the drop counter reasons, so we should skip the test completely. It previously wasn't possible to build a test case for this without polluting the datapath, so we introduce a mechanism to clear all the flows from a datapath allowing us to test for explicit drop actions, and then clear the flows to build the original test case. Fixes: 4242029164d6 ("selftests: openvswitch: add explicit drop testcase") Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- .../selftests/net/openvswitch/openvswitch.sh | 17 +++++++++++ .../testing/selftests/net/openvswitch/ovs-dpctl.py | 34 ++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 2a0112be7ead..f8499d4c87f3 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -144,6 +144,12 @@ ovs_add_flow () { return 0 } +ovs_del_flows () { + info "Deleting all flows from DP: sbx:$1 br:$2" + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-flows "$2" + return 0 +} + ovs_drop_record_and_run () { local sbx=$1 shift @@ -200,6 +206,17 @@ test_drop_reason() { ip netns exec server ip addr add 172.31.110.20/24 dev s1 ip netns exec server ip link set s1 up + # Check if drop reasons can be sent + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(10)' 2>/dev/null + if [ $? == 1 ]; then + info "no support for drop reasons - skipping" + ovs_exit_sig + return $ksft_skip + fi + + ovs_del_flows "test_drop_reason" dropreason + # Allow ARP ovs_add_flow "test_drop_reason" dropreason \ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index e4c24d5edf20..10b8f31548f8 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1906,6 +1906,32 @@ class OvsFlow(GenericNetlinkSocket): raise ne return reply + def del_flows(self, dpifindex): + """ + Send a del message to the kernel that will drop all flows. + + dpifindex should be a valid datapath obtained by calling + into the OvsDatapath lookup + """ + + flowmsg = OvsFlow.ovs_flow_msg() + flowmsg["cmd"] = OVS_FLOW_CMD_DEL + flowmsg["version"] = OVS_DATAPATH_VERSION + flowmsg["reserved"] = 0 + flowmsg["dpifindex"] = dpifindex + + try: + reply = self.nlm_request( + flowmsg, + msg_type=self.prid, + msg_flags=NLM_F_REQUEST | NLM_F_ACK, + ) + reply = reply[0] + except NetlinkError as ne: + print(flowmsg) + raise ne + return reply + def dump(self, dpifindex, flowspec=None): """ Returns a list of messages containing flows. @@ -2068,6 +2094,9 @@ def main(argv): addflcmd.add_argument("flow", help="Flow specification") addflcmd.add_argument("acts", help="Flow actions") + delfscmd = subparsers.add_parser("del-flows") + delfscmd.add_argument("flsbr", help="Datapath name") + args = parser.parse_args() if args.verbose > 0: @@ -2151,6 +2180,11 @@ def main(argv): flow = OvsFlow.ovs_flow_msg() flow.parse(args.flow, args.acts, rep["dpifindex"]) ovsflow.add_flow(rep["dpifindex"], flow) + elif hasattr(args, "flsbr"): + rep = ovsdp.info(args.flsbr, 0) + if rep is None: + print("DP '%s' not found." % args.flsbr) + ovsflow.del_flows(rep["dpifindex"]) return 0 -- cgit v1.2.3 From 8eff0e062201e26739c74ac2355b7362622b7190 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:39 -0400 Subject: selftests: openvswitch: Fix the ct_tuple for v4 The ct_tuple v4 data structure decode / encode routines were using the v6 IP address decode and relying on default encode. This could cause exceptions during encode / decode depending on how a ct4 tuple would appear in a netlink message. Caught during code review. Fixes: e52b07aa1a54 ("selftests: openvswitch: add flow dump support") Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 10b8f31548f8..b97e621face9 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1119,12 +1119,14 @@ class ovskey(nla): "src", lambda x: str(ipaddress.IPv4Address(x)), int, + convert_ipv4, ), ( "dst", "dst", - lambda x: str(ipaddress.IPv6Address(x)), + lambda x: str(ipaddress.IPv4Address(x)), int, + convert_ipv4, ), ("tp_src", "tp_src", "%d", int), ("tp_dst", "tp_dst", "%d", int), -- cgit v1.2.3 From 58720809f52779dc0f08e53e54b014209d13eebb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Oct 2023 13:34:39 -0700 Subject: Linux 6.6-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 88ebf6547964..a3e52e10840e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit v1.2.3 From dc608db793731426938baa2f0e75a4a3cce5f5cf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 16 Oct 2023 14:19:52 +0300 Subject: fbdev: omapfb: fix some error codes Return negative -ENXIO instead of positive ENXIO. Signed-off-by: Dan Carpenter Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index f28cb90947a3..42c96f1cfc93 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1645,13 +1645,13 @@ static int omapfb_do_probe(struct platform_device *pdev, } fbdev->int_irq = platform_get_irq(pdev, 0); if (fbdev->int_irq < 0) { - r = ENXIO; + r = -ENXIO; goto cleanup; } fbdev->ext_irq = platform_get_irq(pdev, 1); if (fbdev->ext_irq < 0) { - r = ENXIO; + r = -ENXIO; goto cleanup; } -- cgit v1.2.3 From e638d3710f0e2483ce01fbc113da83ba3639489c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Oct 2023 22:04:40 +0200 Subject: fbdev: sa1100fb: mark sa1100fb_init() static This is a global function that is only referenced as an initcall. This causes a warning: drivers/video/fbdev/sa1100fb.c:1218:12: error: no previous prototype for 'sa1100fb_init' [-Werror=missing-prototypes] Make it static instead. Signed-off-by: Arnd Bergmann Signed-off-by: Helge Deller --- drivers/video/fbdev/sa1100fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c index 3d76ce111488..cf0f706762b4 100644 --- a/drivers/video/fbdev/sa1100fb.c +++ b/drivers/video/fbdev/sa1100fb.c @@ -1214,7 +1214,7 @@ static struct platform_driver sa1100fb_driver = { }, }; -int __init sa1100fb_init(void) +static int __init sa1100fb_init(void) { if (fb_get_options("sa1100fb", NULL)) return -ENODEV; -- cgit v1.2.3 From 0c37bffaaebe1733433b480d612282169632a31a Mon Sep 17 00:00:00 2001 From: Jorge Maidana Date: Fri, 6 Oct 2023 17:43:46 -0300 Subject: fbdev: uvesafb: Remove uvesafb_exec() prototype from include/video/uvesafb.h uvesafb_exec() is a static function defined and called only in drivers/video/fbdev/uvesafb.c, remove the prototype from include/video/uvesafb.h. Fixes the warning: ./include/video/uvesafb.h:112:12: warning: 'uvesafb_exec' declared 'static' but never defined [-Wunused-function] when including '