From 91abab83839aa2eba073e4a63c729832fdb27ea1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 1 Jul 2019 17:03:29 -0400 Subject: XArray: Fix xas_next() with a single entry at 0 If there is only a single entry at 0, the first time we call xas_next(), we return the entry. Unfortunately, all subsequent times we call xas_next(), we also return the entry at 0 instead of noticing that the xa_index is now greater than zero. This broke find_get_pages_contig(). Fixes: 64d3e9a9e0cc ("xarray: Step through an XArray") Reported-by: Kent Overstreet Signed-off-by: Matthew Wilcox (Oracle) --- lib/test_xarray.c | 24 ++++++++++++++++++++++++ lib/xarray.c | 4 ++++ 2 files changed, 28 insertions(+) diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 9d631a7b6a70..7df4f7f395bf 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1110,6 +1110,28 @@ static noinline void check_find_entry(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_move_tiny(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + + XA_BUG_ON(xa, !xa_empty(xa)); + rcu_read_lock(); + XA_BUG_ON(xa, xas_next(&xas) != NULL); + XA_BUG_ON(xa, xas_next(&xas) != NULL); + rcu_read_unlock(); + xa_store_index(xa, 0, GFP_KERNEL); + rcu_read_lock(); + xas_set(&xas, 0); + XA_BUG_ON(xa, xas_next(&xas) != xa_mk_index(0)); + XA_BUG_ON(xa, xas_next(&xas) != NULL); + xas_set(&xas, 0); + XA_BUG_ON(xa, xas_prev(&xas) != xa_mk_index(0)); + XA_BUG_ON(xa, xas_prev(&xas) != NULL); + rcu_read_unlock(); + xa_erase_index(xa, 0); + XA_BUG_ON(xa, !xa_empty(xa)); +} + static noinline void check_move_small(struct xarray *xa, unsigned long idx) { XA_STATE(xas, xa, 0); @@ -1217,6 +1239,8 @@ static noinline void check_move(struct xarray *xa) xa_destroy(xa); + check_move_tiny(xa); + for (i = 0; i < 16; i++) check_move_small(xa, 1UL << i); diff --git a/lib/xarray.c b/lib/xarray.c index 446b956c9188..1237c213f52b 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -994,6 +994,8 @@ void *__xas_prev(struct xa_state *xas) if (!xas_frozen(xas->xa_node)) xas->xa_index--; + if (!xas->xa_node) + return set_bounds(xas); if (xas_not_node(xas->xa_node)) return xas_load(xas); @@ -1031,6 +1033,8 @@ void *__xas_next(struct xa_state *xas) if (!xas_frozen(xas->xa_node)) xas->xa_index++; + if (!xas->xa_node) + return set_bounds(xas); if (xas_not_node(xas->xa_node)) return xas_load(xas); -- cgit v1.2.3 From 44b09b11b813b8550e6b976ea51593bc23bba8d1 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 21 Sep 2019 17:04:11 +0200 Subject: clk: meson: gxbb: let sar_adc_clk_div set the parent clock rate The meson-saradc driver manually sets the input clock for sar_adc_clk_sel. Update the GXBB clock driver (which is used on GXBB, GXL and GXM) so the rate settings on sar_adc_clk_div are propagated up to sar_adc_clk_sel which will let the common clock framework select the best matching parent clock if we want that. This makes sar_adc_clk_div consistent with the axg-aoclk and g12a-aoclk drivers, which both also specify CLK_SET_RATE_PARENT. Fixes: 33d0fcdfe0e870 ("clk: gxbb: add the SAR ADC clocks and expose them") Signed-off-by: Martin Blumenstingl Signed-off-by: Jerome Brunet --- drivers/clk/meson/gxbb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c index 7cfb998eeb3e..1f9c056e684c 100644 --- a/drivers/clk/meson/gxbb.c +++ b/drivers/clk/meson/gxbb.c @@ -935,6 +935,7 @@ static struct clk_regmap gxbb_sar_adc_clk_div = { &gxbb_sar_adc_clk_sel.hw }, .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, }, }; -- cgit v1.2.3 From 4a079643fc73247667000ba54fbccc2acadb04a5 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 19 Sep 2019 11:36:25 +0200 Subject: clk: meson: g12a: fix cpu clock rate setting CLK_SET_RATE_NO_REPARENT is wrongly set on the g12a cpu premux0 clocks flags, and CLK_SET_RATE_PARENT is required for the g12a cpu premux0 clock and the g12b cpub premux0 clock, otherwise CCF always selects the SYS_PLL clock to feed the cpu cluster. Fixes: ffae8475b90c ("clk: meson: g12a: add notifiers to handle cpu clock change") Signed-off-by: Neil Armstrong Signed-off-by: Jerome Brunet --- drivers/clk/meson/g12a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index ea4c791f106d..33c7e04b4a82 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -353,8 +353,7 @@ static struct clk_regmap g12a_cpu_clk_premux0 = { { .hw = &g12a_fclk_div3.hw }, }, .num_parents = 3, - /* This sub-tree is used a parking clock */ - .flags = CLK_SET_RATE_NO_REPARENT, + .flags = CLK_SET_RATE_PARENT, }, }; @@ -533,6 +532,7 @@ static struct clk_regmap g12b_cpub_clk_premux0 = { { .hw = &g12a_fclk_div3.hw }, }, .num_parents = 3, + .flags = CLK_SET_RATE_PARENT, }, }; -- cgit v1.2.3 From 90b171f6035688236a3f09117a683020be45603a Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 19 Sep 2019 11:36:26 +0200 Subject: clk: meson: g12a: set CLK_MUX_ROUND_CLOSEST on the cpu clock muxes When setting the 100MHz, 500MHz, 666MHz and 1GHz rate for CPU clocks, CCF will use the SYS_PLL to handle these frequencies, but: - using FIXED_PLL derived FCLK_DIV2/DIV3 clocks is more precise - the Amlogic G12A/G12B/SM1 Suspend handling in firmware doesn't handle entering suspend using SYS_PLL for these frequencies Adding CLK_MUX_ROUND_CLOSEST on all the muxes of the non-SYS_PLL cpu clock tree helps CCF always selecting the FCLK_DIV2/DIV3 as source for these frequencies. Fixes: ffae8475b90c ("clk: meson: g12a: add notifiers to handle cpu clock change") Signed-off-by: Neil Armstrong Signed-off-by: Jerome Brunet --- drivers/clk/meson/g12a.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index 33c7e04b4a82..b3af61cc6fb9 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -343,6 +343,7 @@ static struct clk_regmap g12a_cpu_clk_premux0 = { .offset = HHI_SYS_CPU_CLK_CNTL0, .mask = 0x3, .shift = 0, + .flags = CLK_MUX_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "cpu_clk_dyn0_sel", @@ -409,6 +410,7 @@ static struct clk_regmap g12a_cpu_clk_postmux0 = { .offset = HHI_SYS_CPU_CLK_CNTL0, .mask = 0x1, .shift = 2, + .flags = CLK_MUX_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "cpu_clk_dyn0", @@ -465,6 +467,7 @@ static struct clk_regmap g12a_cpu_clk_dyn = { .offset = HHI_SYS_CPU_CLK_CNTL0, .mask = 0x1, .shift = 10, + .flags = CLK_MUX_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "cpu_clk_dyn", @@ -484,6 +487,7 @@ static struct clk_regmap g12a_cpu_clk = { .offset = HHI_SYS_CPU_CLK_CNTL0, .mask = 0x1, .shift = 11, + .flags = CLK_MUX_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "cpu_clk", @@ -503,6 +507,7 @@ static struct clk_regmap g12b_cpu_clk = { .offset = HHI_SYS_CPU_CLK_CNTL0, .mask = 0x1, .shift = 11, + .flags = CLK_MUX_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "cpu_clk", @@ -522,6 +527,7 @@ static struct clk_regmap g12b_cpub_clk_premux0 = { .offset = HHI_SYS_CPUB_CLK_CNTL, .mask = 0x3, .shift = 0, + .flags = CLK_MUX_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "cpub_clk_dyn0_sel", @@ -567,6 +573,7 @@ static struct clk_regmap g12b_cpub_clk_postmux0 = { .offset = HHI_SYS_CPUB_CLK_CNTL, .mask = 0x1, .shift = 2, + .flags = CLK_MUX_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "cpub_clk_dyn0", @@ -644,6 +651,7 @@ static struct clk_regmap g12b_cpub_clk_dyn = { .offset = HHI_SYS_CPUB_CLK_CNTL, .mask = 0x1, .shift = 10, + .flags = CLK_MUX_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "cpub_clk_dyn", @@ -663,6 +671,7 @@ static struct clk_regmap g12b_cpub_clk = { .offset = HHI_SYS_CPUB_CLK_CNTL, .mask = 0x1, .shift = 11, + .flags = CLK_MUX_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "cpub_clk", -- cgit v1.2.3 From b3a81c777dcb093020680490ab970d85e2f6f04f Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Fri, 23 Aug 2019 21:15:27 +0200 Subject: HID: fix error message in hid_open_report() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On HID report descriptor parsing error the code displays bogus pointer instead of error offset (subtracts start=NULL from end). Make the message more useful by displaying correct error offset and include total buffer size for reference. This was carried over from ancient times - "Fixed" commit just promoted the message from DEBUG to ERROR. Cc: stable@vger.kernel.org Fixes: 8c3d52fc393b ("HID: make parser more verbose about parsing errors by default") Signed-off-by: Michał Mirosław Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 3eaee2c37931..63fdbf09b044 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1139,6 +1139,7 @@ int hid_open_report(struct hid_device *device) __u8 *start; __u8 *buf; __u8 *end; + __u8 *next; int ret; static int (*dispatch_type[])(struct hid_parser *parser, struct hid_item *item) = { @@ -1192,7 +1193,8 @@ int hid_open_report(struct hid_device *device) device->collection_size = HID_DEFAULT_NUM_COLLECTIONS; ret = -EINVAL; - while ((start = fetch_item(start, end, &item)) != NULL) { + while ((next = fetch_item(start, end, &item)) != NULL) { + start = next; if (item.format != HID_ITEM_FORMAT_SHORT) { hid_err(device, "unexpected long global item\n"); @@ -1230,7 +1232,8 @@ int hid_open_report(struct hid_device *device) } } - hid_err(device, "item fetching failed at offset %d\n", (int)(end - start)); + hid_err(device, "item fetching failed at offset %u/%u\n", + size - (unsigned int)(end - start), size); err: kfree(parser->collection_stack); alloc_err: -- cgit v1.2.3 From fe2199cfd1516e90e03c033c52c9a28da09d9986 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 5 Sep 2019 17:54:06 +0100 Subject: HID: prodikeys: make array keys static const, makes object smaller Don't populate the array keys on the stack but instead make it static const. Makes the object code smaller by 166 bytes. Before: text data bss dec hex filename 18931 5872 480 25283 62c3 drivers/hid/hid-prodikeys.o After: text data bss dec hex filename 18669 5968 480 25117 621d drivers/hid/hid-prodikeys.o (gcc version 9.2.1, amd64) Signed-off-by: Colin Ian King Signed-off-by: Jiri Kosina --- drivers/hid/hid-prodikeys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index 5a3b3d974d84..2666af02d5c1 100644 --- a/drivers/hid/hid-prodikeys.c +++ b/drivers/hid/hid-prodikeys.c @@ -516,7 +516,7 @@ static void pcmidi_setup_extra_keys( MY PICTURES => KEY_WORDPROCESSOR MY MUSIC=> KEY_SPREADSHEET */ - unsigned int keys[] = { + static const unsigned int keys[] = { KEY_FN, KEY_MESSENGER, KEY_CALENDAR, KEY_ADDRESSBOOK, KEY_DOCUMENTS, @@ -532,7 +532,7 @@ static void pcmidi_setup_extra_keys( 0 }; - unsigned int *pkeys = &keys[0]; + const unsigned int *pkeys = &keys[0]; unsigned short i; if (pm->ifnum != 1) /* only set up ONCE for interace 1 */ -- cgit v1.2.3 From d9d4b1e46d9543a82c23f6df03f4ad697dab361b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 3 Oct 2019 14:53:59 -0400 Subject: HID: Fix assumption that devices have inputs The syzbot fuzzer found a slab-out-of-bounds write bug in the hid-gaff driver. The problem is caused by the driver's assumption that the device must have an input report. While this will be true for all normal HID input devices, a suitably malicious device can violate the assumption. The same assumption is present in over a dozen other HID drivers. This patch fixes them by checking that the list of hid_inputs for the hid_device is nonempty before allowing it to be used. Reported-and-tested-by: syzbot+403741a091bf41d4ae79@syzkaller.appspotmail.com Signed-off-by: Alan Stern CC: Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-axff.c | 11 +++++++++-- drivers/hid/hid-dr.c | 12 +++++++++--- drivers/hid/hid-emsff.c | 12 +++++++++--- drivers/hid/hid-gaff.c | 12 +++++++++--- drivers/hid/hid-holtekff.c | 12 +++++++++--- drivers/hid/hid-lg2ff.c | 12 +++++++++--- drivers/hid/hid-lg3ff.c | 11 +++++++++-- drivers/hid/hid-lg4ff.c | 11 +++++++++-- drivers/hid/hid-lgff.c | 11 +++++++++-- drivers/hid/hid-logitech-hidpp.c | 11 +++++++++-- drivers/hid/hid-microsoft.c | 12 +++++++++--- drivers/hid/hid-sony.c | 12 +++++++++--- drivers/hid/hid-tmff.c | 12 +++++++++--- drivers/hid/hid-zpff.c | 12 +++++++++--- 14 files changed, 126 insertions(+), 37 deletions(-) diff --git a/drivers/hid/hid-axff.c b/drivers/hid/hid-axff.c index 6654c1550e2e..fbe4e16ab029 100644 --- a/drivers/hid/hid-axff.c +++ b/drivers/hid/hid-axff.c @@ -63,13 +63,20 @@ static int axff_init(struct hid_device *hid) { struct axff_device *axff; struct hid_report *report; - struct hid_input *hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list =&hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int field_count = 0; int i, j; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-dr.c b/drivers/hid/hid-dr.c index 17e17f9a597b..947f19f8685f 100644 --- a/drivers/hid/hid-dr.c +++ b/drivers/hid/hid-dr.c @@ -75,13 +75,19 @@ static int drff_init(struct hid_device *hid) { struct drff_device *drff; struct hid_report *report; - struct hid_input *hidinput = list_first_entry(&hid->inputs, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-emsff.c b/drivers/hid/hid-emsff.c index 7cd5651872d3..c34f2e5a049f 100644 --- a/drivers/hid/hid-emsff.c +++ b/drivers/hid/hid-emsff.c @@ -47,13 +47,19 @@ static int emsff_init(struct hid_device *hid) { struct emsff_device *emsff; struct hid_report *report; - struct hid_input *hidinput = list_first_entry(&hid->inputs, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-gaff.c b/drivers/hid/hid-gaff.c index 0f95c96b70f8..ecbd3995a4eb 100644 --- a/drivers/hid/hid-gaff.c +++ b/drivers/hid/hid-gaff.c @@ -64,14 +64,20 @@ static int gaff_init(struct hid_device *hid) { struct gaff_device *gaff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct list_head *report_ptr = report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c index 10a720558830..8619b80c834c 100644 --- a/drivers/hid/hid-holtekff.c +++ b/drivers/hid/hid-holtekff.c @@ -124,13 +124,19 @@ static int holtekff_init(struct hid_device *hid) { struct holtekff_device *holtekff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output report found\n"); return -ENODEV; diff --git a/drivers/hid/hid-lg2ff.c b/drivers/hid/hid-lg2ff.c index dd1a6c3a7de6..73d07e35f12a 100644 --- a/drivers/hid/hid-lg2ff.c +++ b/drivers/hid/hid-lg2ff.c @@ -50,11 +50,17 @@ int lg2ff_init(struct hid_device *hid) { struct lg2ff_device *lg2ff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7); if (!report) diff --git a/drivers/hid/hid-lg3ff.c b/drivers/hid/hid-lg3ff.c index 9ecb6fd06203..b7e1949f3cf7 100644 --- a/drivers/hid/hid-lg3ff.c +++ b/drivers/hid/hid-lg3ff.c @@ -117,12 +117,19 @@ static const signed short ff3_joystick_ac[] = { int lg3ff_init(struct hid_device *hid) { - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; const signed short *ff_bits = ff3_joystick_ac; int error; int i; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 35)) return -ENODEV; diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index 03f0220062ca..5e6a0cef2a06 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -1253,8 +1253,8 @@ static int lg4ff_handle_multimode_wheel(struct hid_device *hid, u16 *real_produc int lg4ff_init(struct hid_device *hid) { - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor); @@ -1266,6 +1266,13 @@ int lg4ff_init(struct hid_device *hid) int mmode_ret, mmode_idx = -1; u16 real_product_id; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) return -1; diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c index c79a6ec43745..aed4ddc397a9 100644 --- a/drivers/hid/hid-lgff.c +++ b/drivers/hid/hid-lgff.c @@ -115,12 +115,19 @@ static void hid_lgff_set_autocenter(struct input_dev *dev, u16 magnitude) int lgff_init(struct hid_device* hid) { - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; const signed short *ff_bits = ff_joystick; int error; int i; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) return -ENODEV; diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 0179f7ed77e5..1ac1ecc1e67c 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -2084,8 +2084,8 @@ static void hidpp_ff_destroy(struct ff_device *ff) static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) { struct hid_device *hid = hidpp->hid_dev; - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor); const u16 bcdDevice = le16_to_cpu(udesc->bcdDevice); struct ff_device *ff; @@ -2094,6 +2094,13 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) int error, j, num_slots; u8 version; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + if (!dev) { hid_err(hid, "Struct input_dev not set!\n"); return -EINVAL; diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c index 2cf83856f2e4..2d8b589201a4 100644 --- a/drivers/hid/hid-microsoft.c +++ b/drivers/hid/hid-microsoft.c @@ -328,11 +328,17 @@ static int ms_play_effect(struct input_dev *dev, void *data, static int ms_init_ff(struct hid_device *hdev) { - struct hid_input *hidinput = list_entry(hdev->inputs.next, - struct hid_input, list); - struct input_dev *input_dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *input_dev; struct ms_data *ms = hid_get_drvdata(hdev); + if (list_empty(&hdev->inputs)) { + hid_err(hdev, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hdev->inputs.next, struct hid_input, list); + input_dev = hidinput->input; + if (!(ms->quirks & MS_QUIRK_FF)) return 0; diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 73c0f7a95e2d..4c6ed6ef31f1 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2254,9 +2254,15 @@ static int sony_play_effect(struct input_dev *dev, void *data, static int sony_init_ff(struct sony_sc *sc) { - struct hid_input *hidinput = list_entry(sc->hdev->inputs.next, - struct hid_input, list); - struct input_dev *input_dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *input_dev; + + if (list_empty(&sc->hdev->inputs)) { + hid_err(sc->hdev, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(sc->hdev->inputs.next, struct hid_input, list); + input_dev = hidinput->input; input_set_capability(input_dev, EV_FF, FF_RUMBLE); return input_ff_create_memless(input_dev, NULL, sony_play_effect); diff --git a/drivers/hid/hid-tmff.c b/drivers/hid/hid-tmff.c index bdfc5ff3b2c5..90acef304536 100644 --- a/drivers/hid/hid-tmff.c +++ b/drivers/hid/hid-tmff.c @@ -124,12 +124,18 @@ static int tmff_init(struct hid_device *hid, const signed short *ff_bits) struct tmff_device *tmff; struct hid_report *report; struct list_head *report_list; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); - struct input_dev *input_dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *input_dev; int error; int i; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + input_dev = hidinput->input; + tmff = kzalloc(sizeof(struct tmff_device), GFP_KERNEL); if (!tmff) return -ENOMEM; diff --git a/drivers/hid/hid-zpff.c b/drivers/hid/hid-zpff.c index f90959e94028..3abaca045869 100644 --- a/drivers/hid/hid-zpff.c +++ b/drivers/hid/hid-zpff.c @@ -54,11 +54,17 @@ static int zpff_init(struct hid_device *hid) { struct zpff_device *zpff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; int i, error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + for (i = 0; i < 4; i++) { report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1); if (!report) -- cgit v1.2.3 From 2200ab6a7403f4fcd052c55ca328fc942f9392b6 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Tue, 24 Sep 2019 10:39:09 +0000 Subject: clk: at91: sam9x60: fix programmable clock The prescaler mask for sam9x60 must be 0xff (8 bits). Being set to 0, means that we cannot set any prescaler, thus the programmable clocks do not work (except the case with prescaler 0) Set the mask accordingly in layout struct. Fixes: 01e2113de9a5 ("clk: at91: add sam9x60 pmc driver") Signed-off-by: Eugen Hristev Link: https://lkml.kernel.org/r/1569321191-27606-1-git-send-email-eugen.hristev@microchip.com Acked-by: Nicolas Ferre Acked-by: Alexandre Belloni Signed-off-by: Stephen Boyd --- drivers/clk/at91/sam9x60.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 9790ddfa5b3c..86238d5ecb4d 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -43,6 +43,7 @@ static const struct clk_pll_characteristics upll_characteristics = { }; static const struct clk_programmable_layout sam9x60_programmable_layout = { + .pres_mask = 0xff, .pres_shift = 8, .css_mask = 0x1f, .have_slck_mck = 0, -- cgit v1.2.3 From 9e4dbc4646a84b2562ea7c64a542740687ff7daf Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 3 Oct 2019 11:17:59 +0800 Subject: HID: google: add magnemite/masterball USB ids Add 2 additional hammer-like devices. Signed-off-by: Nicolas Boichat Signed-off-by: Jiri Kosina --- drivers/hid/hid-google-hammer.c | 4 ++++ drivers/hid/hid-ids.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 84f8c127ebdc..d86a9189e88f 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -469,6 +469,10 @@ static int hammer_probe(struct hid_device *hdev, static const struct hid_device_id hammer_devices[] = { { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MAGNEMITE) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MASTERBALL) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STAFF) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 76969a22b0f2..447e8db21174 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -476,6 +476,8 @@ #define USB_DEVICE_ID_GOOGLE_STAFF 0x502b #define USB_DEVICE_ID_GOOGLE_WAND 0x502d #define USB_DEVICE_ID_GOOGLE_WHISKERS 0x5030 +#define USB_DEVICE_ID_GOOGLE_MASTERBALL 0x503c +#define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f -- cgit v1.2.3 From 24e1eb5c0d78cfb9750b690bbe997d4d59170258 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 8 Oct 2019 17:15:37 +0300 Subject: iio: imu: adis16480: make sure provided frequency is positive It could happen that either `val` or `val2` [provided from userspace] is negative. In that case the computed frequency could get a weird value. Fix this by checking that neither of the 2 variables is negative, and check that the computed result is not-zero. Fixes: e4f959390178 ("iio: imu: adis16480 switch sampling frequency attr to core support") Signed-off-by: Alexandru Ardelean Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis16480.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index b99d73887c9f..8743b2f376e2 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -317,8 +317,11 @@ static int adis16480_set_freq(struct iio_dev *indio_dev, int val, int val2) struct adis16480 *st = iio_priv(indio_dev); unsigned int t, reg; + if (val < 0 || val2 < 0) + return -EINVAL; + t = val * 1000 + val2 / 1000; - if (t <= 0) + if (t == 0) return -EINVAL; /* -- cgit v1.2.3 From 431f7667bd6889a274913162dfd19cce9d84848e Mon Sep 17 00:00:00 2001 From: Andreas Klinger Date: Sun, 6 Oct 2019 16:29:56 +0200 Subject: iio: srf04: fix wrong limitation in distance measuring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The measured time value in the driver is limited to the maximum distance which can be read by the sensor. This limitation was wrong and is fixed by this patch. It also takes into account that we are supporting a variety of sensors today and that the recently added sensors have a higher maximum distance range. Changes in v2: - Added a Tested-by Suggested-by: Zbyněk Kocur Tested-by: Zbyněk Kocur Signed-off-by: Andreas Klinger Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/srf04.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/iio/proximity/srf04.c b/drivers/iio/proximity/srf04.c index 8b50d56b0a03..01eb8cc63076 100644 --- a/drivers/iio/proximity/srf04.c +++ b/drivers/iio/proximity/srf04.c @@ -110,7 +110,7 @@ static int srf04_read(struct srf04_data *data) udelay(data->cfg->trigger_pulse_us); gpiod_set_value(data->gpiod_trig, 0); - /* it cannot take more than 20 ms */ + /* it should not take more than 20 ms until echo is rising */ ret = wait_for_completion_killable_timeout(&data->rising, HZ/50); if (ret < 0) { mutex_unlock(&data->lock); @@ -120,7 +120,8 @@ static int srf04_read(struct srf04_data *data) return -ETIMEDOUT; } - ret = wait_for_completion_killable_timeout(&data->falling, HZ/50); + /* it cannot take more than 50 ms until echo is falling */ + ret = wait_for_completion_killable_timeout(&data->falling, HZ/20); if (ret < 0) { mutex_unlock(&data->lock); return ret; @@ -135,19 +136,19 @@ static int srf04_read(struct srf04_data *data) dt_ns = ktime_to_ns(ktime_dt); /* - * measuring more than 3 meters is beyond the capabilities of - * the sensor + * measuring more than 6,45 meters is beyond the capabilities of + * the supported sensors * ==> filter out invalid results for not measuring echos of * another us sensor * * formula: - * distance 3 m - * time = ---------- = --------- = 9404389 ns - * speed 319 m/s + * distance 6,45 * 2 m + * time = ---------- = ------------ = 40438871 ns + * speed 319 m/s * * using a minimum speed at -20 °C of 319 m/s */ - if (dt_ns > 9404389) + if (dt_ns > 40438871) return -EIO; time_ns = dt_ns; @@ -159,20 +160,20 @@ static int srf04_read(struct srf04_data *data) * with Temp in °C * and speed in m/s * - * use 343 m/s as ultrasonic speed at 20 °C here in absence of the + * use 343,5 m/s as ultrasonic speed at 20 °C here in absence of the * temperature * * therefore: - * time 343 - * distance = ------ * ----- - * 10^6 2 + * time 343,5 time * 106 + * distance = ------ * ------- = ------------ + * 10^6 2 617176 * with time in ns * and distance in mm (one way) * - * because we limit to 3 meters the multiplication with 343 just + * because we limit to 6,45 meters the multiplication with 106 just * fits into 32 bit */ - distance_mm = time_ns * 343 / 2000000; + distance_mm = time_ns * 106 / 617176; return distance_mm; } -- cgit v1.2.3 From a8d23cbbf6c9f515ed678204ad2962be7c336344 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 3 Oct 2019 17:02:01 +0200 Subject: batman-adv: Avoid free/alloc race when handling OGM2 buffer A B.A.T.M.A.N. V virtual interface has an OGM2 packet buffer which is initialized using data from the netdevice notifier and other rtnetlink related hooks. It is sent regularly via various slave interfaces of the batadv virtual interface and in this process also modified (realloced) to integrate additional state information via TVLV containers. It must be avoided that the worker item is executed without a common lock with the netdevice notifier/rtnetlink helpers. Otherwise it can either happen that half modified data is sent out or the functions modifying the OGM2 buffer try to access already freed memory regions. Fixes: 0da0035942d4 ("batman-adv: OGMv2 - add basic infrastructure") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_ogm.c | 41 +++++++++++++++++++++++++++++++++-------- net/batman-adv/types.h | 4 ++++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index dc4f7430cb5a..8033f24f506c 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -256,14 +257,12 @@ static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, } /** - * batadv_v_ogm_send() - periodic worker broadcasting the own OGM - * @work: work queue item + * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM + * @bat_priv: the bat priv with all the soft interface information */ -static void batadv_v_ogm_send(struct work_struct *work) +static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) { struct batadv_hard_iface *hard_iface; - struct batadv_priv_bat_v *bat_v; - struct batadv_priv *bat_priv; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; unsigned char *ogm_buff; @@ -271,8 +270,7 @@ static void batadv_v_ogm_send(struct work_struct *work) u16 tvlv_len = 0; int ret; - bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); - bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; @@ -363,6 +361,23 @@ out: return; } +/** + * batadv_v_ogm_send() - periodic worker broadcasting the own OGM + * @work: work queue item + */ +static void batadv_v_ogm_send(struct work_struct *work) +{ + struct batadv_priv_bat_v *bat_v; + struct batadv_priv *bat_priv; + + bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); + bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + batadv_v_ogm_send_softif(bat_priv); + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); +} + /** * batadv_v_ogm_aggr_work() - OGM queue periodic task per interface * @work: work queue item @@ -424,11 +439,15 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface); struct batadv_ogm2_packet *ogm_packet; + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); if (!bat_priv->bat_v.ogm_buff) - return; + goto unlock; ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff; ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } /** @@ -1050,6 +1069,8 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv) atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno); INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send); + mutex_init(&bat_priv->bat_v.ogm_buff_mutex); + return 0; } @@ -1061,7 +1082,11 @@ void batadv_v_ogm_free(struct batadv_priv *bat_priv) { cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq); + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + kfree(bat_priv->bat_v.ogm_buff); bat_priv->bat_v.ogm_buff = NULL; bat_priv->bat_v.ogm_buff_len = 0; + + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index be7c02aa91e2..a9fb7b17f557 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include /* for linux/wait.h */ @@ -1539,6 +1540,9 @@ struct batadv_priv_bat_v { /** @ogm_seqno: OGM sequence number - used to identify each OGM */ atomic_t ogm_seqno; + /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ + struct mutex ogm_buff_mutex; + /** @ogm_wq: workqueue used to schedule OGM transmissions */ struct delayed_work ogm_wq; }; -- cgit v1.2.3 From 40e220b4218bb3d278e5e8cc04ccdfd1c7ff8307 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 3 Oct 2019 17:02:01 +0200 Subject: batman-adv: Avoid free/alloc race when handling OGM buffer Each slave interface of an B.A.T.M.A.N. IV virtual interface has an OGM packet buffer which is initialized using data from netdevice notifier and other rtnetlink related hooks. It is sent regularly via various slave interfaces of the batadv virtual interface and in this process also modified (realloced) to integrate additional state information via TVLV containers. It must be avoided that the worker item is executed without a common lock with the netdevice notifier/rtnetlink helpers. Otherwise it can either happen that half modified/freed data is sent out or functions modifying the OGM buffer try to access already freed memory regions. Reported-by: syzbot+0cc629f19ccb8534935b@syzkaller.appspotmail.com Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 61 +++++++++++++++++++++++++++++++++++------ net/batman-adv/hard-interface.c | 2 ++ net/batman-adv/types.h | 3 ++ 3 files changed, 57 insertions(+), 9 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index d78938e3e008..5b0b20e6da95 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -193,14 +195,18 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) unsigned char *ogm_buff; u32 random_seqno; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno); hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN; ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC); - if (!ogm_buff) + if (!ogm_buff) { + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); return -ENOMEM; + } hard_iface->bat_iv.ogm_buff = ogm_buff; @@ -212,35 +218,59 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); + return 0; } static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) { + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + kfree(hard_iface->bat_iv.ogm_buff); hard_iface->bat_iv.ogm_buff = NULL; + + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; ether_addr_copy(batadv_ogm_packet->orig, hard_iface->net_dev->dev_addr); ether_addr_copy(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; batadv_ogm_packet->ttl = BATADV_TTL; + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } /* when do we schedule our own ogm to be sent */ @@ -742,7 +772,11 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) } } -static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +/** + * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer + * @hard_iface: interface whose ogm buffer should be transmitted + */ +static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; @@ -753,9 +787,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; - if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || - hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) - return; + lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); /* the interface gets activated here to avoid race conditions between * the moment of activating the interface in @@ -823,6 +855,17 @@ out: batadv_hardif_put(primary_if); } +static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +{ + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) + return; + + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + batadv_iv_ogm_schedule_buff(hard_iface); + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); +} + /** * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over iterface * @orig_node: originator which reproadcasted the OGMs directly diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index c90e47342bb0..afb52282d5bd 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -929,6 +930,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); + mutex_init(&hard_iface->bat_iv.ogm_buff_mutex); spin_lock_init(&hard_iface->neigh_list_lock); kref_init(&hard_iface->refcount); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index a9fb7b17f557..4d7f1baee7b7 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -82,6 +82,9 @@ struct batadv_hard_iface_bat_iv { /** @ogm_seqno: OGM sequence number - used to identify each OGM */ atomic_t ogm_seqno; + + /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ + struct mutex ogm_buff_mutex; }; /** -- cgit v1.2.3 From 8b6bc5fd71e677864d1a3b896b3069a6e0c5e214 Mon Sep 17 00:00:00 2001 From: Zhenfang Wang Date: Thu, 12 Sep 2019 13:47:18 +0800 Subject: dmaengine: sprd: Fix the link-list pointer register configuration issue We will set the link-list pointer register point to next link-list configuration's physical address, which can load DMA configuration from the link-list node automatically. But the link-list node's physical address can be larger than 32bits, and now Spreadtrum DMA driver only supports 32bits physical address, which may cause loading a incorrect DMA configuration when starting the link-list transfer mode. According to the DMA datasheet, we can use SRC_BLK_STEP register (bit28 - bit31) to save the high bits of the link-list node's physical address to fix this issue. Fixes: 4ac695464763 ("dmaengine: sprd: Support DMA link-list mode") Signed-off-by: Zhenfang Wang Signed-off-by: Baolin Wang Link: https://lore.kernel.org/r/eadfe9295499efa003e1c344e67e2890f9d1d780.1568267061.git.baolin.wang@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/sprd-dma.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c index 525dc7338fe3..a4a91f233121 100644 --- a/drivers/dma/sprd-dma.c +++ b/drivers/dma/sprd-dma.c @@ -134,6 +134,10 @@ #define SPRD_DMA_SRC_TRSF_STEP_OFFSET 0 #define SPRD_DMA_TRSF_STEP_MASK GENMASK(15, 0) +/* SPRD DMA_SRC_BLK_STEP register definition */ +#define SPRD_DMA_LLIST_HIGH_MASK GENMASK(31, 28) +#define SPRD_DMA_LLIST_HIGH_SHIFT 28 + /* define DMA channel mode & trigger mode mask */ #define SPRD_DMA_CHN_MODE_MASK GENMASK(7, 0) #define SPRD_DMA_TRG_MODE_MASK GENMASK(7, 0) @@ -717,6 +721,7 @@ static int sprd_dma_fill_desc(struct dma_chan *chan, u32 int_mode = flags & SPRD_DMA_INT_MASK; int src_datawidth, dst_datawidth, src_step, dst_step; u32 temp, fix_mode = 0, fix_en = 0; + phys_addr_t llist_ptr; if (dir == DMA_MEM_TO_DEV) { src_step = sprd_dma_get_step(slave_cfg->src_addr_width); @@ -814,13 +819,16 @@ static int sprd_dma_fill_desc(struct dma_chan *chan, * Set the link-list pointer point to next link-list * configuration's physical address. */ - hw->llist_ptr = schan->linklist.phy_addr + temp; + llist_ptr = schan->linklist.phy_addr + temp; + hw->llist_ptr = lower_32_bits(llist_ptr); + hw->src_blk_step = (upper_32_bits(llist_ptr) << SPRD_DMA_LLIST_HIGH_SHIFT) & + SPRD_DMA_LLIST_HIGH_MASK; } else { hw->llist_ptr = 0; + hw->src_blk_step = 0; } hw->frg_step = 0; - hw->src_blk_step = 0; hw->des_blk_step = 0; return 0; } -- cgit v1.2.3 From 9ec691f48b5ef741a48af8932ccaec859c67e8f1 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Mon, 16 Sep 2019 15:05:13 +0530 Subject: dmaengine: tegra210-adma: fix transfer failure >From Tegra186 onwards OUTSTANDING_REQUESTS field is added in channel configuration register(bits 7:4) which defines the maximum number of reads from the source and writes to the destination that may be outstanding at any given point of time. This field must be programmed with a value between 1 and 8. A value of 0 will prevent any transfers from happening. Thus added 'has_outstanding_reqs' bool member in chip data structure and is set to false for Tegra210, since the field is not applicable. For Tegra186 it is set to true and channel configuration is updated with maximum outstanding requests. Fixes: 433de642a76c ("dmaengine: tegra210-adma: add support for Tegra186/Tegra194") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Acked-by: Jon Hunter Link: https://lore.kernel.org/r/1568626513-16541-1-git-send-email-spujar@nvidia.com Signed-off-by: Vinod Koul --- drivers/dma/tegra210-adma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 5f8adf5c1f20..6e1268552f74 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -40,6 +40,7 @@ #define ADMA_CH_CONFIG_MAX_BURST_SIZE 16 #define ADMA_CH_CONFIG_WEIGHT_FOR_WRR(val) ((val) & 0xf) #define ADMA_CH_CONFIG_MAX_BUFS 8 +#define TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(reqs) (reqs << 4) #define ADMA_CH_FIFO_CTRL 0x2c #define TEGRA210_ADMA_CH_FIFO_CTRL_TXSIZE(val) (((val) & 0xf) << 8) @@ -77,6 +78,7 @@ struct tegra_adma; * @ch_req_tx_shift: Register offset for AHUB transmit channel select. * @ch_req_rx_shift: Register offset for AHUB receive channel select. * @ch_base_offset: Register offset of DMA channel registers. + * @has_outstanding_reqs: If DMA channel can have outstanding requests. * @ch_fifo_ctrl: Default value for channel FIFO CTRL register. * @ch_req_mask: Mask for Tx or Rx channel select. * @ch_req_max: Maximum number of Tx or Rx channels available. @@ -95,6 +97,7 @@ struct tegra_adma_chip_data { unsigned int ch_req_max; unsigned int ch_reg_size; unsigned int nr_channels; + bool has_outstanding_reqs; }; /* @@ -594,6 +597,8 @@ static int tegra_adma_set_xfer_params(struct tegra_adma_chan *tdc, ADMA_CH_CTRL_FLOWCTRL_EN; ch_regs->config |= cdata->adma_get_burst_config(burst_size); ch_regs->config |= ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1); + if (cdata->has_outstanding_reqs) + ch_regs->config |= TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(8); ch_regs->fifo_ctrl = cdata->ch_fifo_ctrl; ch_regs->tc = desc->period_len & ADMA_CH_TC_COUNT_MASK; @@ -778,6 +783,7 @@ static const struct tegra_adma_chip_data tegra210_chip_data = { .ch_req_tx_shift = 28, .ch_req_rx_shift = 24, .ch_base_offset = 0, + .has_outstanding_reqs = false, .ch_fifo_ctrl = TEGRA210_FIFO_CTRL_DEFAULT, .ch_req_mask = 0xf, .ch_req_max = 10, @@ -792,6 +798,7 @@ static const struct tegra_adma_chip_data tegra186_chip_data = { .ch_req_tx_shift = 27, .ch_req_rx_shift = 22, .ch_base_offset = 0x10000, + .has_outstanding_reqs = true, .ch_fifo_ctrl = TEGRA186_FIFO_CTRL_DEFAULT, .ch_req_mask = 0x1f, .ch_req_max = 20, -- cgit v1.2.3 From bd73dfabdda280fc5f05bdec79b6721b4b2f035f Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Tue, 24 Sep 2019 09:49:18 +0000 Subject: dmaengine: imx-sdma: fix size check for sdma script_number Illegal memory will be touch if SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3 (41) exceed the size of structure sdma_script_start_addrs(40), thus cause memory corrupt such as slob block header so that kernel trap into while() loop forever in slob_free(). Please refer to below code piece in imx-sdma.c: for (i = 0; i < sdma->script_number; i++) if (addr_arr[i] > 0) saddr_arr[i] = addr_arr[i]; /* memory corrupt here */ That issue was brought by commit a572460be9cf ("dmaengine: imx-sdma: Add support for version 3 firmware") because SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3 (38->41 3 scripts added) not align with script number added in sdma_script_start_addrs(2 scripts). Fixes: a572460be9cf ("dmaengine: imx-sdma: Add support for version 3 firmware") Cc: stable@vger.kernel Link: https://www.spinics.net/lists/arm-kernel/msg754895.html Signed-off-by: Robin Gong Reported-by: Jurgen Lambrecht Link: https://lore.kernel.org/r/1569347584-3478-1-git-send-email-yibin.gong@nxp.com [vkoul: update the patch title] Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 8 ++++++++ include/linux/platform_data/dma-imx-sdma.h | 3 +++ 2 files changed, 11 insertions(+) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 9ba74ab7e912..c27e206a764c 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1707,6 +1707,14 @@ static void sdma_add_scripts(struct sdma_engine *sdma, if (!sdma->script_number) sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; + if (sdma->script_number > sizeof(struct sdma_script_start_addrs) + / sizeof(s32)) { + dev_err(sdma->dev, + "SDMA script number %d not match with firmware.\n", + sdma->script_number); + return; + } + for (i = 0; i < sdma->script_number; i++) if (addr_arr[i] > 0) saddr_arr[i] = addr_arr[i]; diff --git a/include/linux/platform_data/dma-imx-sdma.h b/include/linux/platform_data/dma-imx-sdma.h index 6eaa53cef0bd..30e676b36b24 100644 --- a/include/linux/platform_data/dma-imx-sdma.h +++ b/include/linux/platform_data/dma-imx-sdma.h @@ -51,7 +51,10 @@ struct sdma_script_start_addrs { /* End of v2 array */ s32 zcanfd_2_mcu_addr; s32 zqspi_2_mcu_addr; + s32 mcu_2_ecspi_addr; /* End of v3 array */ + s32 mcu_2_zqspi_addr; + /* End of v4 array */ }; /** -- cgit v1.2.3 From 112e72373d1f60f1e4558d0a7f0de5da39a1224d Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 11 Oct 2019 14:18:26 -0400 Subject: virtio-fs: Change module name to virtiofs.ko We have been calling it virtio_fs and even file name is virtio_fs.c. Module name is virtio_fs.ko but when registering file system user is supposed to specify filesystem type as "virtiofs". Masayoshi Mizuma reported that he specified filesytem type as "virtio_fs" and got this warning on console. ------------[ cut here ]------------ request_module fs-virtio_fs succeeded, but still no fs? WARNING: CPU: 1 PID: 1234 at fs/filesystems.c:274 get_fs_type+0x12c/0x138 Modules linked in: ... virtio_fs fuse virtio_net net_failover ... CPU: 1 PID: 1234 Comm: mount Not tainted 5.4.0-rc1 #1 So looks like kernel could find the module virtio_fs.ko but could not find filesystem type after that. It probably is better to rename module name to virtiofs.ko so that above warning goes away in case user ends up specifying wrong fs name. Reported-by: Masayoshi Mizuma Suggested-by: Stefan Hajnoczi Signed-off-by: Vivek Goyal Tested-by: Masayoshi Mizuma Reviewed-by: Stefan Hajnoczi Signed-off-by: Miklos Szeredi --- fs/fuse/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 6419a2b3510d..3e8cebfb59b7 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o -obj-$(CONFIG_VIRTIO_FS) += virtio_fs.o +obj-$(CONFIG_VIRTIO_FS) += virtiofs.o fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o +virtiofs-y += virtio_fs.o -- cgit v1.2.3 From 9e8acd9c44a0dd52b2922eeb82398c04e356c058 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 9 Oct 2019 10:31:24 +0200 Subject: bpf: lwtunnel: Fix reroute supplying invalid dst The dst in bpf_input() has lwtstate field set. As it is of the LWTUNNEL_ENCAP_BPF type, lwtstate->data is struct bpf_lwt. When the bpf program returns BPF_LWT_REROUTE, ip_route_input_noref is directly called on this skb. This causes invalid memory access, as ip_route_input_slow calls skb_tunnel_info(skb) that expects the dst->lwstate->data to be struct ip_tunnel_info. This results to struct bpf_lwt being accessed as struct ip_tunnel_info. Drop the dst before calling the IP route input functions (both for IPv4 and IPv6). Reported by KASAN. Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c") Signed-off-by: Jiri Benc Signed-off-by: Alexei Starovoitov Acked-by: Peter Oskolkov Link: https://lore.kernel.org/bpf/111664d58fe4e9dd9c8014bb3d0b2dab93086a9e.1570609794.git.jbenc@redhat.com --- net/core/lwt_bpf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index f93785e5833c..74cfb8b5ab33 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -88,11 +88,16 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb) int err = -EINVAL; if (skb->protocol == htons(ETH_P_IP)) { + struct net_device *dev = skb_dst(skb)->dev; struct iphdr *iph = ip_hdr(skb); + dev_hold(dev); + skb_dst_drop(skb); err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb_dst(skb)->dev); + iph->tos, dev); + dev_put(dev); } else if (skb->protocol == htons(ETH_P_IPV6)) { + skb_dst_drop(skb); err = ipv6_stub->ipv6_route_input(skb); } else { err = -EAFNOSUPPORT; -- cgit v1.2.3 From 68fe2b520cee829ed518b4b1f64d2a557bcbffe1 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Thu, 26 Sep 2019 16:20:57 +0530 Subject: dmaengine: xilinx_dma: Fix 64-bit simple AXIDMA transfer In AXI DMA simple mode also pass MSB bits of source and destination address to xilinx_write function. It fixes simple AXI DMA operation mode using 64-bit addressing. Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1569495060-18117-2-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index e7dc3c4dc8e0..1fbe0258578b 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -1354,7 +1354,8 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) node); hw = &segment->hw; - xilinx_write(chan, XILINX_DMA_REG_SRCDSTADDR, hw->buf_addr); + xilinx_write(chan, XILINX_DMA_REG_SRCDSTADDR, + xilinx_prep_dma_addr_t(hw->buf_addr)); /* Start the transfer */ dma_ctrl_write(chan, XILINX_DMA_REG_BTT, -- cgit v1.2.3 From 6c6de1ddb1be3840f2ed5cc9d009a622720940c9 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Thu, 26 Sep 2019 16:20:58 +0530 Subject: dmaengine: xilinx_dma: Fix control reg update in vdma_channel_set_config In vdma_channel_set_config clear the delay, frame count and master mask before updating their new values. It avoids programming incorrect state when input parameters are different from default. Signed-off-by: Radhey Shyam Pandey Acked-by: Appana Durga Kedareswara rao Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/1569495060-18117-3-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 1fbe0258578b..5d56f1e4d332 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -68,6 +68,9 @@ #define XILINX_DMA_DMACR_CIRC_EN BIT(1) #define XILINX_DMA_DMACR_RUNSTOP BIT(0) #define XILINX_DMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5) +#define XILINX_DMA_DMACR_DELAY_MASK GENMASK(31, 24) +#define XILINX_DMA_DMACR_FRAME_COUNT_MASK GENMASK(23, 16) +#define XILINX_DMA_DMACR_MASTER_MASK GENMASK(11, 8) #define XILINX_DMA_REG_DMASR 0x0004 #define XILINX_DMA_DMASR_EOL_LATE_ERR BIT(15) @@ -2118,8 +2121,10 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan, chan->config.gen_lock = cfg->gen_lock; chan->config.master = cfg->master; + dmacr &= ~XILINX_DMA_DMACR_GENLOCK_EN; if (cfg->gen_lock && chan->genlock) { dmacr |= XILINX_DMA_DMACR_GENLOCK_EN; + dmacr &= ~XILINX_DMA_DMACR_MASTER_MASK; dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT; } @@ -2135,11 +2140,13 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan, chan->config.delay = cfg->delay; if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) { + dmacr &= ~XILINX_DMA_DMACR_FRAME_COUNT_MASK; dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT; chan->config.coalesc = cfg->coalesc; } if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) { + dmacr &= ~XILINX_DMA_DMACR_DELAY_MASK; dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT; chan->config.delay = cfg->delay; } -- cgit v1.2.3 From ec1ac309596a7bdf206743b092748205f6cd5720 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 9 Oct 2019 17:11:30 +0800 Subject: dmaengine: sprd: Fix the possible memory leak issue If we terminate the channel to free all descriptors associated with this channel, we will leak the memory of current descriptor if the current descriptor is not completed, since it had been deteled from the desc_issued list and have not been added into the desc_completed list. Thus we should check if current descriptor is completed or not, when freeing the descriptors associated with one channel, if not, we should free it to avoid this issue. Fixes: 9b3b8171f7f4 ("dmaengine: sprd: Add Spreadtrum DMA driver") Reported-by: Zhenfang Wang Tested-by: Zhenfang Wang Signed-off-by: Baolin Wang Link: https://lore.kernel.org/r/170dbbc6d5366b6fa974ce2d366652e23a334251.1570609788.git.baolin.wang@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/sprd-dma.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c index a4a91f233121..8546ad034720 100644 --- a/drivers/dma/sprd-dma.c +++ b/drivers/dma/sprd-dma.c @@ -212,6 +212,7 @@ struct sprd_dma_dev { struct sprd_dma_chn channels[0]; }; +static void sprd_dma_free_desc(struct virt_dma_desc *vd); static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param); static struct of_dma_filter_info sprd_dma_info = { .filter_fn = sprd_dma_filter_fn, @@ -613,12 +614,19 @@ static int sprd_dma_alloc_chan_resources(struct dma_chan *chan) static void sprd_dma_free_chan_resources(struct dma_chan *chan) { struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + struct virt_dma_desc *cur_vd = NULL; unsigned long flags; spin_lock_irqsave(&schan->vc.lock, flags); + if (schan->cur_desc) + cur_vd = &schan->cur_desc->vd; + sprd_dma_stop(schan); spin_unlock_irqrestore(&schan->vc.lock, flags); + if (cur_vd) + sprd_dma_free_desc(cur_vd); + vchan_free_chan_resources(&schan->vc); pm_runtime_put(chan->device->dev); } @@ -1031,15 +1039,22 @@ static int sprd_dma_resume(struct dma_chan *chan) static int sprd_dma_terminate_all(struct dma_chan *chan) { struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + struct virt_dma_desc *cur_vd = NULL; unsigned long flags; LIST_HEAD(head); spin_lock_irqsave(&schan->vc.lock, flags); + if (schan->cur_desc) + cur_vd = &schan->cur_desc->vd; + sprd_dma_stop(schan); vchan_get_all_descriptors(&schan->vc, &head); spin_unlock_irqrestore(&schan->vc.lock, flags); + if (cur_vd) + sprd_dma_free_desc(cur_vd); + vchan_dma_desc_free_list(&schan->vc, &head); return 0; } -- cgit v1.2.3 From 52eb063d153ac310058fbaa91577a72c0e7a7169 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Thu, 3 Oct 2019 12:13:54 +0200 Subject: soundwire: depend on ACPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The device cannot be probed on !ACPI and gives this warning: drivers/soundwire/slave.c:16:12: warning: ‘sdw_slave_add’ defined but not used [-Wunused-function] static int sdw_slave_add(struct sdw_bus *bus, ^~~~~~~~~~~~~ Cc: stable@vger.kernel.org Fixes: 7c3cd189b86d ("soundwire: Add Master registration") Signed-off-by: Michal Suchanek Link: https://lore.kernel.org/r/bd685232ea511251eeb9554172f1524eabf9a46e.1570097621.git.msuchanek@suse.de Signed-off-by: Vinod Koul --- drivers/soundwire/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soundwire/Kconfig b/drivers/soundwire/Kconfig index f518273cfbe3..c73bfbaa2659 100644 --- a/drivers/soundwire/Kconfig +++ b/drivers/soundwire/Kconfig @@ -5,6 +5,7 @@ menuconfig SOUNDWIRE tristate "SoundWire support" + depends on ACPI help SoundWire is a 2-Pin interface with data and clock line ratified by the MIPI Alliance. SoundWire is used for transporting data -- cgit v1.2.3 From 0f8c0f8a7782178c40157b2feb6a532493cbadd3 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Thu, 3 Oct 2019 12:13:55 +0200 Subject: soundwire: depend on ACPI || OF Now devicetree is supported for probing soundwire as well. On platforms built with !ACPI !OF (ie s390x) the device still cannot be probed and gives a build warning. Cc: stable@vger.kernel.org Fixes: a2e484585ad3 ("soundwire: core: add device tree support for slave devices") Signed-off-by: Michal Suchanek Link: https://lore.kernel.org/r/0b89b4ea16a93f523105c81a2f718b0cd7ec66f2.1570097621.git.msuchanek@suse.de Signed-off-by: Vinod Koul --- drivers/soundwire/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soundwire/Kconfig b/drivers/soundwire/Kconfig index c73bfbaa2659..c8c80df090d1 100644 --- a/drivers/soundwire/Kconfig +++ b/drivers/soundwire/Kconfig @@ -5,7 +5,7 @@ menuconfig SOUNDWIRE tristate "SoundWire support" - depends on ACPI + depends on ACPI || OF help SoundWire is a 2-Pin interface with data and clock line ratified by the MIPI Alliance. SoundWire is used for transporting data -- cgit v1.2.3 From 3f22c7467136adfa6d2a7baf7cd5c573f0641bd1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 15 Oct 2019 16:11:41 +0200 Subject: virtio-fs: don't show mount options Virtio-fs does not accept any mount options, so it's confusing and wrong to show any in /proc/mounts. Reported-by: Stefan Hajnoczi Signed-off-by: Miklos Szeredi --- fs/fuse/fuse_i.h | 4 ++++ fs/fuse/inode.c | 4 ++++ fs/fuse/virtio_fs.c | 1 + 3 files changed, 9 insertions(+) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 956aeaf961ae..d148188cfca4 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -479,6 +479,7 @@ struct fuse_fs_context { bool destroy:1; bool no_control:1; bool no_force_umount:1; + bool no_mount_options:1; unsigned int max_read; unsigned int blksize; const char *subtype; @@ -713,6 +714,9 @@ struct fuse_conn { /** Do not allow MNT_FORCE umount */ unsigned int no_force_umount:1; + /* Do not show mount options */ + unsigned int no_mount_options:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e040e2a2b621..16aec32f7f3d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -558,6 +558,9 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) struct super_block *sb = root->d_sb; struct fuse_conn *fc = get_fuse_conn_super(sb); + if (fc->no_mount_options) + return 0; + seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id)); seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id)); if (fc->default_permissions) @@ -1180,6 +1183,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) fc->destroy = ctx->destroy; fc->no_control = ctx->no_control; fc->no_force_umount = ctx->no_force_umount; + fc->no_mount_options = ctx->no_mount_options; err = -ENOMEM; root = fuse_get_root_inode(sb, ctx->rootmode); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 6af3f131e468..e22a0c003c3d 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -992,6 +992,7 @@ static int virtio_fs_fill_super(struct super_block *sb) .destroy = true, .no_control = true, .no_force_umount = true, + .no_mount_options = true, }; mutex_lock(&virtio_fs_mutex); -- cgit v1.2.3 From 16ff7bf6dbcc6f77d2eec1ac9120edf44213c2f1 Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 16 Oct 2019 08:15:59 +0800 Subject: HID: intel-ish-hid: fix wrong error handling in ishtp_cl_alloc_tx_ring() When allocating tx ring buffers failed, should free tx buffers, not rx buffers. Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp/client-buffers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp/client-buffers.c b/drivers/hid/intel-ish-hid/ishtp/client-buffers.c index 1b0a0cc605e7..513d7a4a1b8a 100644 --- a/drivers/hid/intel-ish-hid/ishtp/client-buffers.c +++ b/drivers/hid/intel-ish-hid/ishtp/client-buffers.c @@ -84,7 +84,7 @@ int ishtp_cl_alloc_tx_ring(struct ishtp_cl *cl) return 0; out: dev_err(&cl->device->dev, "error in allocating Tx pool\n"); - ishtp_cl_free_rx_ring(cl); + ishtp_cl_free_tx_ring(cl); return -ENOMEM; } -- cgit v1.2.3 From 41d49e7939de5ec532d86494185b2ca2e99c848a Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Thu, 19 Sep 2019 21:13:15 +0200 Subject: clocksource/drivers/mediatek: Fix error handling When timer_of_init fails, it cleans up after itself by undoing everything it did during the initialization function. mtk_syst_init and mtk_gpt_init both call timer_of_cleanup if timer_of_init fails. timer_of_cleanup try to release the resource taken. Since these resources have already been cleaned up by timer_of_init, we end up getting a few warnings printed: [ 0.001935] WARNING: CPU: 0 PID: 0 at __clk_put+0xe8/0x128 [ 0.002650] Modules linked in: [ 0.003058] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.67+ #1 [ 0.003852] Hardware name: MediaTek MT8183 (DT) [ 0.004446] pstate: 20400085 (nzCv daIf +PAN -UAO) [ 0.005073] pc : __clk_put+0xe8/0x128 [ 0.005555] lr : clk_put+0xc/0x14 [ 0.005988] sp : ffffff80090b3ea0 [ 0.006422] x29: ffffff80090b3ea0 x28: 0000000040e20018 [ 0.007121] x27: ffffffc07bfff780 x26: 0000000000000001 [ 0.007819] x25: ffffff80090bda80 x24: ffffff8008ec5828 [ 0.008517] x23: ffffff80090bd000 x22: ffffff8008d8b2e8 [ 0.009216] x21: 0000000000000001 x20: fffffffffffffdfb [ 0.009914] x19: ffffff8009166180 x18: 00000000002bffa8 [ 0.010612] x17: ffffffc012996980 x16: 0000000000000000 [ 0.011311] x15: ffffffbf004a6800 x14: 3536343038393334 [ 0.012009] x13: 2079726576652073 x12: 7eb9c62c5c38f100 [ 0.012707] x11: ffffff80090b3ba0 x10: ffffff80090b3ba0 [ 0.013405] x9 : 0000000000000004 x8 : 0000000000000040 [ 0.014103] x7 : ffffffc079400270 x6 : 0000000000000000 [ 0.014801] x5 : ffffffc079400248 x4 : 0000000000000000 [ 0.015499] x3 : 0000000000000000 x2 : 0000000000000000 [ 0.016197] x1 : ffffff80091661c0 x0 : fffffffffffffdfb [ 0.016896] Call trace: [ 0.017218] __clk_put+0xe8/0x128 [ 0.017654] clk_put+0xc/0x14 [ 0.018048] timer_of_cleanup+0x60/0x7c [ 0.018551] mtk_syst_init+0x8c/0x9c [ 0.019020] timer_probe+0x6c/0xe0 [ 0.019469] time_init+0x14/0x44 [ 0.019893] start_kernel+0x2d0/0x46c [ 0.020378] ---[ end trace 8c1efabea1267649 ]--- [ 0.020982] ------------[ cut here ]------------ [ 0.021586] Trying to vfree() nonexistent vm area ((____ptrval____)) [ 0.022427] WARNING: CPU: 0 PID: 0 at __vunmap+0xd0/0xd8 [ 0.023119] Modules linked in: [ 0.023524] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 4.19.67+ #1 [ 0.024498] Hardware name: MediaTek MT8183 (DT) [ 0.025091] pstate: 60400085 (nZCv daIf +PAN -UAO) [ 0.025718] pc : __vunmap+0xd0/0xd8 [ 0.026176] lr : __vunmap+0xd0/0xd8 [ 0.026632] sp : ffffff80090b3e90 [ 0.027066] x29: ffffff80090b3e90 x28: 0000000040e20018 [ 0.027764] x27: ffffffc07bfff780 x26: 0000000000000001 [ 0.028462] x25: ffffff80090bda80 x24: ffffff8008ec5828 [ 0.029160] x23: ffffff80090bd000 x22: ffffff8008d8b2e8 [ 0.029858] x21: 0000000000000000 x20: 0000000000000000 [ 0.030556] x19: ffffff800800d000 x18: 00000000002bffa8 [ 0.031254] x17: 0000000000000000 x16: 0000000000000000 [ 0.031952] x15: ffffffbf004a6800 x14: 3536343038393334 [ 0.032651] x13: 2079726576652073 x12: 7eb9c62c5c38f100 [ 0.033349] x11: ffffff80090b3b40 x10: ffffff80090b3b40 [ 0.034047] x9 : 0000000000000005 x8 : 5f5f6c6176727470 [ 0.034745] x7 : 5f5f5f5f28282061 x6 : ffffff80091c86ef [ 0.035443] x5 : ffffff800852b690 x4 : 0000000000000000 [ 0.036141] x3 : 0000000000000002 x2 : 0000000000000002 [ 0.036839] x1 : 7eb9c62c5c38f100 x0 : 7eb9c62c5c38f100 [ 0.037536] Call trace: [ 0.037859] __vunmap+0xd0/0xd8 [ 0.038271] vunmap+0x24/0x30 [ 0.038664] __iounmap+0x2c/0x34 [ 0.039088] timer_of_cleanup+0x70/0x7c [ 0.039591] mtk_syst_init+0x8c/0x9c [ 0.040060] timer_probe+0x6c/0xe0 [ 0.040507] time_init+0x14/0x44 [ 0.040932] start_kernel+0x2d0/0x46c This commit remove the calls to timer_of_cleanup when timer_of_init fails since it is unnecessary and actually cause warnings to be printed. Fixes: a0858f937960 ("mediatek: Convert the driver to timer-of") Signed-off-by: Fabien Parent Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/linux-arm-kernel/20190919191315.25190-1-fparent@baylibre.com/ --- drivers/clocksource/timer-mediatek.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/clocksource/timer-mediatek.c b/drivers/clocksource/timer-mediatek.c index a562f491b0f8..9318edcd8963 100644 --- a/drivers/clocksource/timer-mediatek.c +++ b/drivers/clocksource/timer-mediatek.c @@ -268,15 +268,12 @@ static int __init mtk_syst_init(struct device_node *node) ret = timer_of_init(node, &to); if (ret) - goto err; + return ret; clockevents_config_and_register(&to.clkevt, timer_of_rate(&to), TIMER_SYNC_TICKS, 0xffffffff); return 0; -err: - timer_of_cleanup(&to); - return ret; } static int __init mtk_gpt_init(struct device_node *node) @@ -293,7 +290,7 @@ static int __init mtk_gpt_init(struct device_node *node) ret = timer_of_init(node, &to); if (ret) - goto err; + return ret; /* Configure clock source */ mtk_gpt_setup(&to, TIMER_CLK_SRC, GPT_CTRL_OP_FREERUN); @@ -311,9 +308,6 @@ static int __init mtk_gpt_init(struct device_node *node) mtk_gpt_enable_irq(&to, TIMER_CLK_EVT); return 0; -err: - timer_of_cleanup(&to); - return ret; } TIMER_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_gpt_init); TIMER_OF_DECLARE(mtk_mt6765, "mediatek,mt6765-timer", mtk_syst_init); -- cgit v1.2.3 From d10f60ae27d26d811e2a1bb39ded47df96d7499f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Oct 2019 16:51:28 +0000 Subject: powerpc/32s: fix allow/prevent_user_access() when crossing segment boundaries. Make sure starting addr is aligned to segment boundary so that when incrementing the segment, the starting address of the new segment is below the end address. Otherwise the last segment might get missed. Fixes: a68c31fc01ef ("powerpc/32s: Implement Kernel Userspace Access Protection") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/067a1b09f15f421d40797c2d04c22d4049a1cee8.1571071875.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/book3s/32/kup.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 677e9babef80..f9dc597b0b86 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -91,6 +91,7 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) { + addr &= 0xf0000000; /* align addr to start of segment */ barrier(); /* make sure thread.kuap is updated before playing with SRs */ while (addr < end) { mtsrin(sr, addr); -- cgit v1.2.3 From c8973df2da677f375f8b12b6eefca2f44c8884d5 Mon Sep 17 00:00:00 2001 From: Rafi Wiener Date: Wed, 2 Oct 2019 15:02:43 +0300 Subject: RDMA/mlx5: Clear old rate limit when closing QP Before QP is closed it changes to ERROR state, when this happens the QP was left with old rate limit that was already removed from the table. Fixes: 7d29f349a4b9 ("IB/mlx5: Properly adjust rate limit on QP state transitions") Signed-off-by: Rafi Wiener Signed-off-by: Oleg Kuporosov Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191002120243.16971-1-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8937d72ddcf6..5fd071c05944 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3249,10 +3249,12 @@ static int modify_raw_packet_qp_sq( } /* Only remove the old rate after new rate was set */ - if ((old_rl.rate && - !mlx5_rl_are_equal(&old_rl, &new_rl)) || - (new_state != MLX5_SQC_STATE_RDY)) + if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) || + (new_state != MLX5_SQC_STATE_RDY)) { mlx5_rl_remove_rate(dev, &old_rl); + if (new_state != MLX5_SQC_STATE_RDY) + memset(&new_rl, 0, sizeof(new_rl)); + } ibqp->rl = new_rl; sq->state = new_state; -- cgit v1.2.3 From 9ed5bd7d22241ad232fd3a5be404e83eb6cadc04 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Fri, 4 Oct 2019 16:40:35 -0400 Subject: IB/hfi1: Avoid excessive retry for TID RDMA READ request A TID RDMA READ request could be retried under one of the following conditions: - The RC retry timer expires; - A later TID RDMA READ RESP packet is received before the next expected one. For the latter, under normal conditions, the PSN in IB space is used for comparison. More specifically, the IB PSN in the incoming TID RDMA READ RESP packet is compared with the last IB PSN of a given TID RDMA READ request to determine if the request should be retried. This is similar to the retry logic for noraml RDMA READ request. However, if a TID RDMA READ RESP packet is lost due to congestion, header suppresion will be disabled and each incoming packet will raise an interrupt until the hardware flow is reloaded. Under this condition, each packet KDETH PSN will be checked by software against r_next_psn and a retry will be requested if the packet KDETH PSN is later than r_next_psn. Since each TID RDMA READ segment could have up to 64 packets and each TID RDMA READ request could have many segments, we could make far more retries under such conditions, and thus leading to RETRY_EXC_ERR status. This patch fixes the issue by removing the retry when the incoming packet KDETH PSN is later than r_next_psn. Instead, it resorts to RC timer and normal IB PSN comparison for any request retry. Fixes: 9905bf06e890 ("IB/hfi1: Add functions to receive TID RDMA READ response") Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/20191004204035.26542.41684.stgit@awfm-01.aw.intel.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/tid_rdma.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index b4dcc4d29f84..f21fca3617d5 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2736,11 +2736,6 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, diff = cmp_psn(psn, flow->flow_state.r_next_psn); if (diff > 0) { - if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) - restart_tid_rdma_read_req(rcd, - qp, - wqe); - /* Drop the packet.*/ goto s_unlock; } else if (diff < 0) { -- cgit v1.2.3 From 22bb13653410424d9fce8d447506a41f8292f22f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 4 Oct 2019 16:49:34 -0400 Subject: IB/hfi1: Use a common pad buffer for 9B and 16B packets There is no reason for a different pad buffer for the two packet types. Expand the current buffer allocation to allow for both packet types. Fixes: f8195f3b14a0 ("IB/hfi1: Eliminate allocation while atomic") Reported-by: Dan Carpenter Reviewed-by: Kaike Wan Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/20191004204934.26838.13099.stgit@awfm-01.aw.intel.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 5 +++-- drivers/infiniband/hw/hfi1/verbs.c | 10 ++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 2ed7bfd5feea..c61b6022575e 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -65,6 +65,7 @@ #define SDMA_DESCQ_CNT 2048 #define SDMA_DESC_INTR 64 #define INVALID_TAIL 0xffff +#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32)) static uint sdma_descq_cnt = SDMA_DESCQ_CNT; module_param(sdma_descq_cnt, uint, S_IRUGO); @@ -1296,7 +1297,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) struct sdma_engine *sde; if (dd->sdma_pad_dma) { - dma_free_coherent(&dd->pcidev->dev, 4, + dma_free_coherent(&dd->pcidev->dev, SDMA_PAD, (void *)dd->sdma_pad_dma, dd->sdma_pad_phys); dd->sdma_pad_dma = NULL; @@ -1491,7 +1492,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } /* Allocate memory for pad */ - dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32), + dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD, &dd->sdma_pad_phys, GFP_KERNEL); if (!dd->sdma_pad_dma) { dd_dev_err(dd, "failed to allocate SendDMA pad memory\n"); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 7bff0a1e713d..089e201d7550 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -147,9 +147,6 @@ static int pio_wait(struct rvt_qp *qp, /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 -/* 16B trailing buffer */ -static const u8 trail_buf[MAX_16B_PADDING]; - static uint wss_threshold = 80; module_param(wss_threshold, uint, S_IRUGO); MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); @@ -820,8 +817,8 @@ static int build_verbs_tx_desc( /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, - (void *)trail_buf, extra_bytes); + ret = sdma_txadd_daddr(sde->dd, &tx->txreq, + sde->dd->sdma_pad_phys, extra_bytes); bail_txadd: return ret; @@ -1089,7 +1086,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - seg_pio_copy_mid(pbuf, trail_buf, extra_bytes); + seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma, + extra_bytes); seg_pio_copy_end(pbuf); } -- cgit v1.2.3 From 7693de9f7aa4e2993fbd7094863304be6a4bbe16 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 16 Oct 2019 16:30:03 +0200 Subject: clocksource/drivers/sh_mtu2: Do not loop using platform_get_irq_by_name() As platform_get_irq_by_name() now prints an error when the interrupt does not exist, looping over possibly non-existing interrupts causes the printing of scary messages like: sh_mtu2 fcff0000.timer: IRQ tgi1a not found sh_mtu2 fcff0000.timer: IRQ tgi2a not found Fix this by using the platform_irq_count() helper, to avoid touching non-existent interrupts. Limit the returned number of interrupts to the maximum number of channels currently supported by the driver in a future-proof way, i.e. using ARRAY_SIZE() instead of a hardcoded number. Fixes: 7723f4c5ecdb8d83 ("driver core: platform: Add an error message to platform_get_irq*()") Signed-off-by: Geert Uytterhoeven Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20191016143003.28561-1-geert+renesas@glider.be --- drivers/clocksource/sh_mtu2.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c index 354b27d14a19..62812f80b5cc 100644 --- a/drivers/clocksource/sh_mtu2.c +++ b/drivers/clocksource/sh_mtu2.c @@ -328,12 +328,13 @@ static int sh_mtu2_register(struct sh_mtu2_channel *ch, const char *name) return 0; } +static const unsigned int sh_mtu2_channel_offsets[] = { + 0x300, 0x380, 0x000, +}; + static int sh_mtu2_setup_channel(struct sh_mtu2_channel *ch, unsigned int index, struct sh_mtu2_device *mtu) { - static const unsigned int channel_offsets[] = { - 0x300, 0x380, 0x000, - }; char name[6]; int irq; int ret; @@ -356,7 +357,7 @@ static int sh_mtu2_setup_channel(struct sh_mtu2_channel *ch, unsigned int index, return ret; } - ch->base = mtu->mapbase + channel_offsets[index]; + ch->base = mtu->mapbase + sh_mtu2_channel_offsets[index]; ch->index = index; return sh_mtu2_register(ch, dev_name(&mtu->pdev->dev)); @@ -408,7 +409,12 @@ static int sh_mtu2_setup(struct sh_mtu2_device *mtu, } /* Allocate and setup the channels. */ - mtu->num_channels = 3; + ret = platform_irq_count(pdev); + if (ret < 0) + goto err_unmap; + + mtu->num_channels = min_t(unsigned int, ret, + ARRAY_SIZE(sh_mtu2_channel_offsets)); mtu->channels = kcalloc(mtu->num_channels, sizeof(*mtu->channels), GFP_KERNEL); -- cgit v1.2.3 From 7667819385457b4aeb5fac94f67f52ab52cc10d5 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Thu, 17 Oct 2019 08:26:06 -0700 Subject: dmaengine: qcom: bam_dma: Fix resource leak bam_dma_terminate_all() will leak resources if any of the transactions are committed to the hardware (present in the desc fifo), and not complete. Since bam_dma_terminate_all() does not cause the hardware to be updated, the hardware will still operate on any previously committed transactions. This can cause memory corruption if the memory for the transaction has been reassigned, and will cause a sync issue between the BAM and its client(s). Fix this by properly updating the hardware in bam_dma_terminate_all(). Fixes: e7c0fe2a5c84 ("dmaengine: add Qualcomm BAM dma driver") Signed-off-by: Jeffrey Hugo Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191017152606.34120-1-jeffrey.l.hugo@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/bam_dma.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index 8e90a405939d..ef73f65224b1 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -694,6 +694,25 @@ static int bam_dma_terminate_all(struct dma_chan *chan) /* remove all transactions, including active transaction */ spin_lock_irqsave(&bchan->vc.lock, flag); + /* + * If we have transactions queued, then some might be committed to the + * hardware in the desc fifo. The only way to reset the desc fifo is + * to do a hardware reset (either by pipe or the entire block). + * bam_chan_init_hw() will trigger a pipe reset, and also reinit the + * pipe. If the pipe is left disabled (default state after pipe reset) + * and is accessed by a connected hardware engine, a fatal error in + * the BAM will occur. There is a small window where this could happen + * with bam_chan_init_hw(), but it is assumed that the caller has + * stopped activity on any attached hardware engine. Make sure to do + * this first so that the BAM hardware doesn't cause memory corruption + * by accessing freed resources. + */ + if (!list_empty(&bchan->desc_list)) { + async_desc = list_first_entry(&bchan->desc_list, + struct bam_async_desc, desc_node); + bam_chan_init_hw(bchan, async_desc->dir); + } + list_for_each_entry_safe(async_desc, tmp, &bchan->desc_list, desc_node) { list_add(&async_desc->vd.node, &bchan->vc.desc_issued); -- cgit v1.2.3 From 67b18dfb8cfc6d6c2f45ba8c546088f5c14f5bd5 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 16 Oct 2019 23:12:24 +0800 Subject: HID: i2c-hid: Remove runtime power management Runtime power management in i2c-hid brings lots of issues, such as: - When transitioning from display manager to desktop session, i2c-hid was closed and opened, so the device was set to SLEEP and ON in a short period. Vendors confirmed that their devices can't handle fast ON/SLEEP command because Windows doesn't have this behavior. - When rebooting, i2c-hid was closed, and the driver core put the device back to full power before shutdown. This behavior also triggers a quick SLEEP and ON commands that some devices can't handle, renders an unusable touchpad after reboot. - Most importantly, my power meter reports little to none energy saving when i2c-hid is runtime suspended. So let's remove runtime power management since there is no actual benefit. Signed-off-by: Kai-Heng Feng Acked-by: Hans de Goede Signed-off-by: Benjamin Tissoires --- drivers/hid/i2c-hid/i2c-hid-core.c | 118 +++---------------------------------- 1 file changed, 7 insertions(+), 111 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 2a7c6e33bb1c..d9c55e30f986 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -48,8 +47,6 @@ /* quirks to control the device */ #define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV BIT(0) #define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET BIT(1) -#define I2C_HID_QUIRK_NO_RUNTIME_PM BIT(2) -#define I2C_HID_QUIRK_DELAY_AFTER_SLEEP BIT(3) #define I2C_HID_QUIRK_BOGUS_IRQ BIT(4) /* flags */ @@ -172,14 +169,7 @@ static const struct i2c_hid_quirks { { USB_VENDOR_ID_WEIDA, HID_ANY_ID, I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV }, { I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288, - I2C_HID_QUIRK_NO_IRQ_AFTER_RESET | - I2C_HID_QUIRK_NO_RUNTIME_PM }, - { I2C_VENDOR_ID_RAYDIUM, I2C_PRODUCT_ID_RAYDIUM_4B33, - I2C_HID_QUIRK_DELAY_AFTER_SLEEP }, - { USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_8001, - I2C_HID_QUIRK_NO_RUNTIME_PM }, - { I2C_VENDOR_ID_GOODIX, I2C_DEVICE_ID_GOODIX_01F0, - I2C_HID_QUIRK_NO_RUNTIME_PM }, + I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, { USB_VENDOR_ID_ELAN, HID_ANY_ID, I2C_HID_QUIRK_BOGUS_IRQ }, { 0, 0 } @@ -397,7 +387,6 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state) { struct i2c_hid *ihid = i2c_get_clientdata(client); int ret; - unsigned long now, delay; i2c_hid_dbg(ihid, "%s\n", __func__); @@ -415,22 +404,9 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state) goto set_pwr_exit; } - if (ihid->quirks & I2C_HID_QUIRK_DELAY_AFTER_SLEEP && - power_state == I2C_HID_PWR_ON) { - now = jiffies; - if (time_after(ihid->sleep_delay, now)) { - delay = jiffies_to_usecs(ihid->sleep_delay - now); - usleep_range(delay, delay + 1); - } - } - ret = __i2c_hid_command(client, &hid_set_power_cmd, power_state, 0, NULL, 0, NULL, 0); - if (ihid->quirks & I2C_HID_QUIRK_DELAY_AFTER_SLEEP && - power_state == I2C_HID_PWR_SLEEP) - ihid->sleep_delay = jiffies + msecs_to_jiffies(20); - if (ret) dev_err(&client->dev, "failed to change power setting.\n"); @@ -791,11 +767,6 @@ static int i2c_hid_open(struct hid_device *hid) { struct i2c_client *client = hid->driver_data; struct i2c_hid *ihid = i2c_get_clientdata(client); - int ret = 0; - - ret = pm_runtime_get_sync(&client->dev); - if (ret < 0) - return ret; set_bit(I2C_HID_STARTED, &ihid->flags); return 0; @@ -807,27 +778,6 @@ static void i2c_hid_close(struct hid_device *hid) struct i2c_hid *ihid = i2c_get_clientdata(client); clear_bit(I2C_HID_STARTED, &ihid->flags); - - /* Save some power */ - pm_runtime_put(&client->dev); -} - -static int i2c_hid_power(struct hid_device *hid, int lvl) -{ - struct i2c_client *client = hid->driver_data; - struct i2c_hid *ihid = i2c_get_clientdata(client); - - i2c_hid_dbg(ihid, "%s lvl:%d\n", __func__, lvl); - - switch (lvl) { - case PM_HINT_FULLON: - pm_runtime_get_sync(&client->dev); - break; - case PM_HINT_NORMAL: - pm_runtime_put(&client->dev); - break; - } - return 0; } struct hid_ll_driver i2c_hid_ll_driver = { @@ -836,7 +786,6 @@ struct hid_ll_driver i2c_hid_ll_driver = { .stop = i2c_hid_stop, .open = i2c_hid_open, .close = i2c_hid_close, - .power = i2c_hid_power, .output_report = i2c_hid_output_report, .raw_request = i2c_hid_raw_request, }; @@ -1104,9 +1053,6 @@ static int i2c_hid_probe(struct i2c_client *client, i2c_hid_acpi_fix_up_power(&client->dev); - pm_runtime_get_noresume(&client->dev); - pm_runtime_set_active(&client->dev); - pm_runtime_enable(&client->dev); device_enable_async_suspend(&client->dev); /* Make sure there is something at this address */ @@ -1114,16 +1060,16 @@ static int i2c_hid_probe(struct i2c_client *client, if (ret < 0) { dev_dbg(&client->dev, "nothing at this address: %d\n", ret); ret = -ENXIO; - goto err_pm; + goto err_regulator; } ret = i2c_hid_fetch_hid_descriptor(ihid); if (ret < 0) - goto err_pm; + goto err_regulator; ret = i2c_hid_init_irq(client); if (ret < 0) - goto err_pm; + goto err_regulator; hid = hid_allocate_device(); if (IS_ERR(hid)) { @@ -1154,9 +1100,6 @@ static int i2c_hid_probe(struct i2c_client *client, goto err_mem_free; } - if (!(ihid->quirks & I2C_HID_QUIRK_NO_RUNTIME_PM)) - pm_runtime_put(&client->dev); - return 0; err_mem_free: @@ -1165,10 +1108,6 @@ err_mem_free: err_irq: free_irq(client->irq, ihid); -err_pm: - pm_runtime_put_noidle(&client->dev); - pm_runtime_disable(&client->dev); - err_regulator: regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies), ihid->pdata.supplies); @@ -1181,12 +1120,6 @@ static int i2c_hid_remove(struct i2c_client *client) struct i2c_hid *ihid = i2c_get_clientdata(client); struct hid_device *hid; - if (!(ihid->quirks & I2C_HID_QUIRK_NO_RUNTIME_PM)) - pm_runtime_get_sync(&client->dev); - pm_runtime_disable(&client->dev); - pm_runtime_set_suspended(&client->dev); - pm_runtime_put_noidle(&client->dev); - hid = ihid->hid; hid_destroy_device(hid); @@ -1219,25 +1152,15 @@ static int i2c_hid_suspend(struct device *dev) int wake_status; if (hid->driver && hid->driver->suspend) { - /* - * Wake up the device so that IO issues in - * HID driver's suspend code can succeed. - */ - ret = pm_runtime_resume(dev); - if (ret < 0) - return ret; - ret = hid->driver->suspend(hid, PMSG_SUSPEND); if (ret < 0) return ret; } - if (!pm_runtime_suspended(dev)) { - /* Save some power */ - i2c_hid_set_power(client, I2C_HID_PWR_SLEEP); + /* Save some power */ + i2c_hid_set_power(client, I2C_HID_PWR_SLEEP); - disable_irq(client->irq); - } + disable_irq(client->irq); if (device_may_wakeup(&client->dev)) { wake_status = enable_irq_wake(client->irq); @@ -1279,11 +1202,6 @@ static int i2c_hid_resume(struct device *dev) wake_status); } - /* We'll resume to full power */ - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - enable_irq(client->irq); /* Instead of resetting device, simply powers the device on. This @@ -1304,30 +1222,8 @@ static int i2c_hid_resume(struct device *dev) } #endif -#ifdef CONFIG_PM -static int i2c_hid_runtime_suspend(struct device *dev) -{ - struct i2c_client *client = to_i2c_client(dev); - - i2c_hid_set_power(client, I2C_HID_PWR_SLEEP); - disable_irq(client->irq); - return 0; -} - -static int i2c_hid_runtime_resume(struct device *dev) -{ - struct i2c_client *client = to_i2c_client(dev); - - enable_irq(client->irq); - i2c_hid_set_power(client, I2C_HID_PWR_ON); - return 0; -} -#endif - static const struct dev_pm_ops i2c_hid_pm = { SET_SYSTEM_SLEEP_PM_OPS(i2c_hid_suspend, i2c_hid_resume) - SET_RUNTIME_PM_OPS(i2c_hid_runtime_suspend, i2c_hid_runtime_resume, - NULL) }; static const struct i2c_device_id i2c_hid_id_table[] = { -- cgit v1.2.3 From abdd3d0b344fdf72a4904d09b97bc964d74c4419 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 17 Oct 2019 21:45:15 -0700 Subject: HID: logitech-hidpp: split g920_get_config() Original version of g920_get_config() contained two kind of actions: 1. Device specific communication to query/set some parameters which requires active communication channel with the device, or, put in other way, for the call to be sandwiched between hid_device_io_start() and hid_device_io_stop(). 2. Input subsystem specific FF controller initialization which, in order to access a valid 'struct hid_input' via 'hid->inputs.next', requires claimed hidinput which means be executed after the call to hid_hw_start() with connect_mask containing HID_CONNECT_HIDINPUT. Location of g920_get_config() can only fulfill requirements for #1 and not #2, which might result in following backtrace: [ 88.312258] logitech-hidpp-device 0003:046D:C262.0005: HID++ 4.2 device connected. [ 88.320298] BUG: kernel NULL pointer dereference, address: 0000000000000018 [ 88.320304] #PF: supervisor read access in kernel mode [ 88.320307] #PF: error_code(0x0000) - not-present page [ 88.320309] PGD 0 P4D 0 [ 88.320315] Oops: 0000 [#1] SMP PTI [ 88.320320] CPU: 1 PID: 3080 Comm: systemd-udevd Not tainted 5.4.0-rc1+ #31 [ 88.320322] Hardware name: Apple Inc. MacBookPro11,1/Mac-189A3D4F975D5FFC, BIOS 149.0.0.0.0 09/17/2018 [ 88.320334] RIP: 0010:hidpp_probe+0x61f/0x948 [hid_logitech_hidpp] [ 88.320338] Code: 81 00 00 48 89 ef e8 f0 d6 ff ff 41 89 c6 85 c0 75 b5 0f b6 44 24 28 48 8b 5d 00 88 44 24 1e 89 44 24 0c 48 8b 83 18 1c 00 00 <48> 8b 48 18 48 8b 83 10 19 00 00 48 8b 40 40 48 89 0c 24 0f b7 80 [ 88.320341] RSP: 0018:ffffb0a6824aba68 EFLAGS: 00010246 [ 88.320345] RAX: 0000000000000000 RBX: ffff93a50756e000 RCX: 0000000000010408 [ 88.320347] RDX: 0000000000000000 RSI: ffff93a51f0ad0a0 RDI: 000000000002d0a0 [ 88.320350] RBP: ffff93a50416da28 R08: ffff93a50416da70 R09: ffff93a50416da70 [ 88.320352] R10: 000000148ae9e60c R11: 00000000000f1525 R12: ffff93a50756e000 [ 88.320354] R13: ffff93a50756f8d0 R14: 0000000000000000 R15: ffff93a50756fc38 [ 88.320358] FS: 00007f8d8c1e0940(0000) GS:ffff93a51f080000(0000) knlGS:0000000000000000 [ 88.320361] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 88.320363] CR2: 0000000000000018 CR3: 00000003996d8003 CR4: 00000000001606e0 [ 88.320366] Call Trace: [ 88.320377] ? _cond_resched+0x15/0x30 [ 88.320387] ? create_pinctrl+0x2f/0x3c0 [ 88.320393] ? kernfs_link_sibling+0x94/0xe0 [ 88.320398] ? _cond_resched+0x15/0x30 [ 88.320402] ? kernfs_activate+0x5f/0x80 [ 88.320406] ? kernfs_add_one+0xe2/0x130 [ 88.320411] hid_device_probe+0x106/0x170 [ 88.320419] really_probe+0x147/0x3c0 [ 88.320424] driver_probe_device+0xb6/0x100 [ 88.320428] device_driver_attach+0x53/0x60 [ 88.320433] __driver_attach+0x8a/0x150 [ 88.320437] ? device_driver_attach+0x60/0x60 [ 88.320440] bus_for_each_dev+0x78/0xc0 [ 88.320445] bus_add_driver+0x14d/0x1f0 [ 88.320450] driver_register+0x6c/0xc0 [ 88.320453] ? 0xffffffffc0d67000 [ 88.320457] __hid_register_driver+0x4c/0x80 [ 88.320464] do_one_initcall+0x46/0x1f4 [ 88.320469] ? _cond_resched+0x15/0x30 [ 88.320474] ? kmem_cache_alloc_trace+0x162/0x220 [ 88.320481] ? do_init_module+0x23/0x230 [ 88.320486] do_init_module+0x5c/0x230 [ 88.320491] load_module+0x26e1/0x2990 [ 88.320502] ? ima_post_read_file+0xf0/0x100 [ 88.320508] ? __do_sys_finit_module+0xaa/0x110 [ 88.320512] __do_sys_finit_module+0xaa/0x110 [ 88.320520] do_syscall_64+0x5b/0x180 [ 88.320525] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 88.320528] RIP: 0033:0x7f8d8d1f01fd [ 88.320532] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 5b 8c 0c 00 f7 d8 64 89 01 48 [ 88.320535] RSP: 002b:00007ffefa3bb068 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 88.320539] RAX: ffffffffffffffda RBX: 000055922040cb40 RCX: 00007f8d8d1f01fd [ 88.320541] RDX: 0000000000000000 RSI: 00007f8d8ce4984d RDI: 0000000000000006 [ 88.320543] RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000007 [ 88.320545] R10: 0000000000000006 R11: 0000000000000246 R12: 00007f8d8ce4984d [ 88.320547] R13: 0000000000000000 R14: 000055922040efc0 R15: 000055922040cb40 [ 88.320551] Modules linked in: hid_logitech_hidpp(+) fuse rfcomm ccm xt_CHECKSUM xt_MASQUERADE bridge stp llc nf_nat_tftp nf_conntrack_tftp nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ebtable_nat ip6table_nat ip6table_mangle ip6table_raw ip6table_security iptable_nat nf_nat tun iptable_mangle iptable_raw iptable_security nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c ip_set nfnetlink ebtable_filter ebtables ip6table_filter ip6_tables cmac bnep sunrpc dm_crypt nls_utf8 hfsplus intel_rapl_msr intel_rapl_common ath9k_htc ath9k_common x86_pkg_temp_thermal intel_powerclamp b43 ath9k_hw coretemp snd_hda_codec_hdmi cordic kvm_intel snd_hda_codec_cirrus mac80211 snd_hda_codec_generic ledtrig_audio kvm snd_hda_intel snd_intel_nhlt irqbypass snd_hda_codec btusb btrtl snd_hda_core ath btbcm ssb snd_hwdep btintel snd_seq crct10dif_pclmul iTCO_wdt snd_seq_device crc32_pclmul bluetooth mmc_core iTCO_vendor_support joydev cfg80211 [ 88.320602] applesmc ghash_clmulni_intel ecdh_generic snd_pcm input_polldev intel_cstate ecc intel_uncore thunderbolt snd_timer i2c_i801 libarc4 rfkill intel_rapl_perf lpc_ich mei_me pcspkr bcm5974 snd bcma mei soundcore acpi_als sbs kfifo_buf sbshc industrialio apple_bl i915 i2c_algo_bit drm_kms_helper drm uas crc32c_intel usb_storage video hid_apple [ 88.320630] CR2: 0000000000000018 [ 88.320633] ---[ end trace 933491c8a4fadeb7 ]--- [ 88.320642] RIP: 0010:hidpp_probe+0x61f/0x948 [hid_logitech_hidpp] [ 88.320645] Code: 81 00 00 48 89 ef e8 f0 d6 ff ff 41 89 c6 85 c0 75 b5 0f b6 44 24 28 48 8b 5d 00 88 44 24 1e 89 44 24 0c 48 8b 83 18 1c 00 00 <48> 8b 48 18 48 8b 83 10 19 00 00 48 8b 40 40 48 89 0c 24 0f b7 80 [ 88.320647] RSP: 0018:ffffb0a6824aba68 EFLAGS: 00010246 [ 88.320650] RAX: 0000000000000000 RBX: ffff93a50756e000 RCX: 0000000000010408 [ 88.320652] RDX: 0000000000000000 RSI: ffff93a51f0ad0a0 RDI: 000000000002d0a0 [ 88.320655] RBP: ffff93a50416da28 R08: ffff93a50416da70 R09: ffff93a50416da70 [ 88.320657] R10: 000000148ae9e60c R11: 00000000000f1525 R12: ffff93a50756e000 [ 88.320659] R13: ffff93a50756f8d0 R14: 0000000000000000 R15: ffff93a50756fc38 [ 88.320662] FS: 00007f8d8c1e0940(0000) GS:ffff93a51f080000(0000) knlGS:0000000000000000 [ 88.320664] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 88.320667] CR2: 0000000000000018 CR3: 00000003996d8003 CR4: 00000000001606e0 To solve this issue: 1. Split g920_get_config() such that all of the device specific communication remains a part of the function and input subsystem initialization bits go to hidpp_ff_init() 2. Move call to hidpp_ff_init() from being a part of g920_get_config() to be the last step of .probe(), right after a call to hid_hw_start() with connect_mask containing HID_CONNECT_HIDINPUT. Fixes: 91cf9a98ae41 ("HID: logitech-hidpp: make .probe usbhid capable") Signed-off-by: Andrey Smirnov Tested-by: Sam Bazley Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: Henrik Rydberg Cc: Pierre-Loup A. Griffais Cc: Austin Palmer Cc: linux-input@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 5.2+ Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-hidpp.c | 150 +++++++++++++++++++++++++-------------- 1 file changed, 96 insertions(+), 54 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 1ac1ecc1e67c..85911586b3b6 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -1669,6 +1669,7 @@ static void hidpp_touchpad_raw_xy_event(struct hidpp_device *hidpp_dev, #define HIDPP_FF_EFFECTID_NONE -1 #define HIDPP_FF_EFFECTID_AUTOCENTER -2 +#define HIDPP_AUTOCENTER_PARAMS_LENGTH 18 #define HIDPP_FF_MAX_PARAMS 20 #define HIDPP_FF_RESERVED_SLOTS 1 @@ -2009,7 +2010,7 @@ static int hidpp_ff_erase_effect(struct input_dev *dev, int effect_id) static void hidpp_ff_set_autocenter(struct input_dev *dev, u16 magnitude) { struct hidpp_ff_private_data *data = dev->ff->private; - u8 params[18]; + u8 params[HIDPP_AUTOCENTER_PARAMS_LENGTH]; dbg_hid("Setting autocenter to %d.\n", magnitude); @@ -2081,7 +2082,8 @@ static void hidpp_ff_destroy(struct ff_device *ff) kfree(data->effect_ids); } -static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) +static int hidpp_ff_init(struct hidpp_device *hidpp, + struct hidpp_ff_private_data *data) { struct hid_device *hid = hidpp->hid_dev; struct hid_input *hidinput; @@ -2089,9 +2091,7 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor); const u16 bcdDevice = le16_to_cpu(udesc->bcdDevice); struct ff_device *ff; - struct hidpp_report response; - struct hidpp_ff_private_data *data; - int error, j, num_slots; + int error, j, num_slots = data->num_effects; u8 version; if (list_empty(&hid->inputs)) { @@ -2116,27 +2116,17 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) for (j = 0; hidpp_ff_effects_v2[j] >= 0; j++) set_bit(hidpp_ff_effects_v2[j], dev->ffbit); - /* Read number of slots available in device */ - error = hidpp_send_fap_command_sync(hidpp, feature_index, - HIDPP_FF_GET_INFO, NULL, 0, &response); - if (error) { - if (error < 0) - return error; - hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n", - __func__, error); - return -EPROTO; - } - - num_slots = response.fap.params[0] - HIDPP_FF_RESERVED_SLOTS; - error = input_ff_create(dev, num_slots); if (error) { hid_err(dev, "Failed to create FF device!\n"); return error; } - - data = kzalloc(sizeof(*data), GFP_KERNEL); + /* + * Create a copy of passed data, so we can transfer memory + * ownership to FF core + */ + data = kmemdup(data, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; data->effect_ids = kcalloc(num_slots, sizeof(int), GFP_KERNEL); @@ -2152,10 +2142,7 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) } data->hidpp = hidpp; - data->feature_index = feature_index; data->version = version; - data->slot_autocenter = 0; - data->num_effects = num_slots; for (j = 0; j < num_slots; j++) data->effect_ids[j] = -1; @@ -2169,37 +2156,14 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) ff->set_autocenter = hidpp_ff_set_autocenter; ff->destroy = hidpp_ff_destroy; - - /* reset all forces */ - error = hidpp_send_fap_command_sync(hidpp, feature_index, - HIDPP_FF_RESET_ALL, NULL, 0, &response); - - /* Read current Range */ - error = hidpp_send_fap_command_sync(hidpp, feature_index, - HIDPP_FF_GET_APERTURE, NULL, 0, &response); - if (error) - hid_warn(hidpp->hid_dev, "Failed to read range from device!\n"); - data->range = error ? 900 : get_unaligned_be16(&response.fap.params[0]); - /* Create sysfs interface */ error = device_create_file(&(hidpp->hid_dev->dev), &dev_attr_range); if (error) hid_warn(hidpp->hid_dev, "Unable to create sysfs interface for \"range\", errno %d!\n", error); - /* Read the current gain values */ - error = hidpp_send_fap_command_sync(hidpp, feature_index, - HIDPP_FF_GET_GLOBAL_GAINS, NULL, 0, &response); - if (error) - hid_warn(hidpp->hid_dev, "Failed to read gain values from device!\n"); - data->gain = error ? 0xffff : get_unaligned_be16(&response.fap.params[0]); - /* ignore boost value at response.fap.params[2] */ - /* init the hardware command queue */ atomic_set(&data->workqueue_size, 0); - /* initialize with zero autocenter to get wheel in usable state */ - hidpp_ff_set_autocenter(dev, 0); - hid_info(hid, "Force feedback support loaded (firmware release %d).\n", version); @@ -2732,24 +2696,93 @@ static int k400_connect(struct hid_device *hdev, bool connected) #define HIDPP_PAGE_G920_FORCE_FEEDBACK 0x8123 -static int g920_get_config(struct hidpp_device *hidpp) +static int g920_ff_set_autocenter(struct hidpp_device *hidpp, + struct hidpp_ff_private_data *data) { + struct hidpp_report response; + u8 params[HIDPP_AUTOCENTER_PARAMS_LENGTH] = { + [1] = HIDPP_FF_EFFECT_SPRING | HIDPP_FF_EFFECT_AUTOSTART, + }; + int ret; + + /* initialize with zero autocenter to get wheel in usable state */ + + dbg_hid("Setting autocenter to 0.\n"); + ret = hidpp_send_fap_command_sync(hidpp, data->feature_index, + HIDPP_FF_DOWNLOAD_EFFECT, + params, ARRAY_SIZE(params), + &response); + if (ret) + hid_warn(hidpp->hid_dev, "Failed to autocenter device!\n"); + else + data->slot_autocenter = response.fap.params[0]; + + return ret; +} + +static int g920_get_config(struct hidpp_device *hidpp, + struct hidpp_ff_private_data *data) +{ + struct hidpp_report response; u8 feature_type; - u8 feature_index; int ret; + memset(data, 0, sizeof(*data)); + /* Find feature and store for later use */ ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_G920_FORCE_FEEDBACK, - &feature_index, &feature_type); + &data->feature_index, &feature_type); if (ret) return ret; - ret = hidpp_ff_init(hidpp, feature_index); + /* Read number of slots available in device */ + ret = hidpp_send_fap_command_sync(hidpp, data->feature_index, + HIDPP_FF_GET_INFO, + NULL, 0, + &response); + if (ret) { + if (ret < 0) + return ret; + hid_err(hidpp->hid_dev, + "%s: received protocol error 0x%02x\n", __func__, ret); + return -EPROTO; + } + + data->num_effects = response.fap.params[0] - HIDPP_FF_RESERVED_SLOTS; + + /* reset all forces */ + ret = hidpp_send_fap_command_sync(hidpp, data->feature_index, + HIDPP_FF_RESET_ALL, + NULL, 0, + &response); if (ret) - hid_warn(hidpp->hid_dev, "Unable to initialize force feedback support, errno %d\n", - ret); + hid_warn(hidpp->hid_dev, "Failed to reset all forces!\n"); - return 0; + ret = hidpp_send_fap_command_sync(hidpp, data->feature_index, + HIDPP_FF_GET_APERTURE, + NULL, 0, + &response); + if (ret) { + hid_warn(hidpp->hid_dev, + "Failed to read range from device!\n"); + } + data->range = ret ? + 900 : get_unaligned_be16(&response.fap.params[0]); + + /* Read the current gain values */ + ret = hidpp_send_fap_command_sync(hidpp, data->feature_index, + HIDPP_FF_GET_GLOBAL_GAINS, + NULL, 0, + &response); + if (ret) + hid_warn(hidpp->hid_dev, + "Failed to read gain values from device!\n"); + data->gain = ret ? + 0xffff : get_unaligned_be16(&response.fap.params[0]); + + /* ignore boost value at response.fap.params[2] */ + + return g920_ff_set_autocenter(hidpp, data); } /* -------------------------------------------------------------------------- */ @@ -3512,6 +3545,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) int ret; bool connected; unsigned int connect_mask = HID_CONNECT_DEFAULT; + struct hidpp_ff_private_data data; /* report_fixup needs drvdata to be set before we call hid_parse */ hidpp = devm_kzalloc(&hdev->dev, sizeof(*hidpp), GFP_KERNEL); @@ -3621,7 +3655,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) if (ret) goto hid_hw_init_fail; } else if (connected && (hidpp->quirks & HIDPP_QUIRK_CLASS_G920)) { - ret = g920_get_config(hidpp); + ret = g920_get_config(hidpp, &data); if (ret) goto hid_hw_init_fail; } @@ -3643,6 +3677,14 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) goto hid_hw_start_fail; } + if (hidpp->quirks & HIDPP_QUIRK_CLASS_G920) { + ret = hidpp_ff_init(hidpp, &data); + if (ret) + hid_warn(hidpp->hid_dev, + "Unable to initialize force feedback support, errno %d\n", + ret); + } + return ret; hid_hw_init_fail: -- cgit v1.2.3 From 905d754c53a522aacf806ea1d3e7c929148c1910 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 17 Oct 2019 21:45:16 -0700 Subject: HID: logitech-hidpp: rework device validation G920 device only advertises REPORT_ID_HIDPP_LONG and REPORT_ID_HIDPP_VERY_LONG in its HID report descriptor, so querying for REPORT_ID_HIDPP_SHORT with optional=false will always fail and prevent G920 to be recognized as a valid HID++ device. To fix this and improve some other aspects, modify hidpp_validate_device() as follows: - Inline the code of hidpp_validate_report() to simplify distingushing between non-present and invalid report descriptors - Drop the check for id >= HID_MAX_IDS || id < 0 since all of our IDs are static and known to satisfy that at compile time - Change the algorithms to check all possible report types (including very long report) and deem the device as a valid HID++ device if it supports at least one - Treat invalid report length as a hard stop for the validation algorithm, meaning that if any of the supported reports has invalid length we assume the worst and treat the device as a generic HID device. - Fold initialization of hidpp->very_long_report_length into hidpp_validate_device() since it already fetches very long report length and validates its value Fixes: fe3ee1ec007b ("HID: logitech-hidpp: allow non HID++ devices to be handled by this module") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204191 Reported-by: Sam Bazely Signed-off-by: Andrey Smirnov Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: Henrik Rydberg Cc: Pierre-Loup A. Griffais Cc: Austin Palmer Cc: linux-input@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 5.2+ Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-hidpp.c | 54 ++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 85911586b3b6..6e669eb7dc69 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -3498,34 +3498,45 @@ static int hidpp_get_report_length(struct hid_device *hdev, int id) return report->field[0]->report_count + 1; } -static bool hidpp_validate_report(struct hid_device *hdev, int id, - int expected_length, bool optional) +static bool hidpp_validate_device(struct hid_device *hdev) { - int report_length; + struct hidpp_device *hidpp = hid_get_drvdata(hdev); + int id, report_length, supported_reports = 0; + + id = REPORT_ID_HIDPP_SHORT; + report_length = hidpp_get_report_length(hdev, id); + if (report_length) { + if (report_length < HIDPP_REPORT_SHORT_LENGTH) + goto bad_device; - if (id >= HID_MAX_IDS || id < 0) { - hid_err(hdev, "invalid HID report id %u\n", id); - return false; + supported_reports++; } + id = REPORT_ID_HIDPP_LONG; report_length = hidpp_get_report_length(hdev, id); - if (!report_length) - return optional; + if (report_length) { + if (report_length < HIDPP_REPORT_LONG_LENGTH) + goto bad_device; - if (report_length < expected_length) { - hid_warn(hdev, "not enough values in hidpp report %d\n", id); - return false; + supported_reports++; } - return true; -} + id = REPORT_ID_HIDPP_VERY_LONG; + report_length = hidpp_get_report_length(hdev, id); + if (report_length) { + if (report_length < HIDPP_REPORT_LONG_LENGTH || + report_length > HIDPP_REPORT_VERY_LONG_MAX_LENGTH) + goto bad_device; -static bool hidpp_validate_device(struct hid_device *hdev) -{ - return hidpp_validate_report(hdev, REPORT_ID_HIDPP_SHORT, - HIDPP_REPORT_SHORT_LENGTH, false) && - hidpp_validate_report(hdev, REPORT_ID_HIDPP_LONG, - HIDPP_REPORT_LONG_LENGTH, true); + supported_reports++; + hidpp->very_long_report_length = report_length; + } + + return supported_reports; + +bad_device: + hid_warn(hdev, "not enough values in hidpp report %d\n", id); + return false; } static bool hidpp_application_equals(struct hid_device *hdev, @@ -3572,11 +3583,6 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) return hid_hw_start(hdev, HID_CONNECT_DEFAULT); } - hidpp->very_long_report_length = - hidpp_get_report_length(hdev, REPORT_ID_HIDPP_VERY_LONG); - if (hidpp->very_long_report_length > HIDPP_REPORT_VERY_LONG_MAX_LENGTH) - hidpp->very_long_report_length = HIDPP_REPORT_VERY_LONG_MAX_LENGTH; - if (id->group == HID_GROUP_LOGITECH_DJ_DEVICE) hidpp->quirks |= HIDPP_QUIRK_UNIFYING; -- cgit v1.2.3 From 08c453f6d073f069cf8e30e03cd3c16262c9b953 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 17 Oct 2019 21:45:17 -0700 Subject: HID: logitech-hidpp: do all FF cleanup in hidpp_ff_destroy() All of the FF-related resources belong to corresponding FF device, so they should be freed as a part of hidpp_ff_destroy() to avoid potential race condidions. Fixes: ff21a635dd1a ("HID: logitech-hidpp: Force feedback support for the Logitech G920") Suggested-by: Benjamin Tissoires Signed-off-by: Andrey Smirnov Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: Henrik Rydberg Cc: Pierre-Loup A. Griffais Cc: Austin Palmer Cc: linux-input@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 5.2+ Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-hidpp.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 6e669eb7dc69..8e91e2f06cb4 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -2078,7 +2078,12 @@ static DEVICE_ATTR(range, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, hidpp static void hidpp_ff_destroy(struct ff_device *ff) { struct hidpp_ff_private_data *data = ff->private; + struct hid_device *hid = data->hidpp->hid_dev; + hid_info(hid, "Unloading HID++ force feedback.\n"); + + device_remove_file(&hid->dev, &dev_attr_range); + destroy_workqueue(data->wq); kfree(data->effect_ids); } @@ -2170,31 +2175,6 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, return 0; } -static int hidpp_ff_deinit(struct hid_device *hid) -{ - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; - struct hidpp_ff_private_data *data; - - if (!dev) { - hid_err(hid, "Struct input_dev not found!\n"); - return -EINVAL; - } - - hid_info(hid, "Unloading HID++ force feedback.\n"); - data = dev->ff->private; - if (!data) { - hid_err(hid, "Private data not found!\n"); - return -EINVAL; - } - - destroy_workqueue(data->wq); - device_remove_file(&hid->dev, &dev_attr_range); - - return 0; -} - - /* ************************************************************************** */ /* */ /* Device Support */ @@ -3713,9 +3693,6 @@ static void hidpp_remove(struct hid_device *hdev) sysfs_remove_group(&hdev->dev.kobj, &ps_attribute_group); - if (hidpp->quirks & HIDPP_QUIRK_CLASS_G920) - hidpp_ff_deinit(hdev); - hid_hw_stop(hdev); cancel_work_sync(&hidpp->work); mutex_destroy(&hidpp->send_mutex); -- cgit v1.2.3 From 09f3dbe474735df13dd8a66d3d1231048d9b373f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Oct 2019 20:56:26 +0200 Subject: HID: i2c-hid: add Trekstor Primebook C11B to descriptor override The Primebook C11B uses the SIPODEV SP1064 touchpad. There are 2 versions of this 2-in-1 and the touchpad in the older version does not supply descriptors, so it has to be added to the override list. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires --- drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index 75078c83be1a..d31ea82b84c1 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -322,6 +322,25 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + /* + * There are at least 2 Primebook C11B versions, the older + * version has a product-name of "Primebook C11B", and a + * bios version / release / firmware revision of: + * V2.1.2 / 05/03/2018 / 18.2 + * The new version has "PRIMEBOOK C11B" as product-name and a + * bios version / release / firmware revision of: + * CFALKSW05_BIOS_V1.1.2 / 11/19/2018 / 19.2 + * Only the older version needs this quirk, note the newer + * version will not match as it has a different product-name. + */ + .ident = "Trekstor Primebook C11B", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Primebook C11B"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Direkt-Tek DTLAPY116-2", .matches = { -- cgit v1.2.3 From 612e0486ad0845c41ac10492e78144f99e326375 Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Thu, 3 Oct 2019 16:13:53 +0530 Subject: iw_cxgb4: fix ECN check on the passive accept pass_accept_req() is using the same skb for handling accept request and sending accept reply to HW. Here req and rpl structures are pointing to same skb->data which is over written by INIT_TP_WR() and leads to accessing corrupt req fields in accept_cr() while checking for ECN flags. Reordered code in accept_cr() to fetch correct req fields. Fixes: 92e7ae7172 ("iw_cxgb4: Choose appropriate hw mtu index and ISS for iWARP connections") Signed-off-by: Potnuri Bharat Teja Link: https://lore.kernel.org/r/20191003104353.11590-1-bharat@chelsio.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e87fc0408470..9e8eca7b613c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2424,20 +2424,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; pr_debug("ep %p tid %u\n", ep, ep->hwtid); - - skb_get(skb); - rpl = cplhdr(skb); - if (!is_t4(adapter_type)) { - skb_trim(skb, roundup(sizeof(*rpl5), 16)); - rpl5 = (void *)rpl; - INIT_TP_WR(rpl5, ep->hwtid); - } else { - skb_trim(skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - } - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - ep->hwtid)); - cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps && req->tcpopt.tstamp, (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); @@ -2483,6 +2469,20 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN_V(1); } + + skb_get(skb); + rpl = cplhdr(skb); + if (!is_t4(adapter_type)) { + skb_trim(skb, roundup(sizeof(*rpl5), 16)); + rpl5 = (void *)rpl; + INIT_TP_WR(rpl5, ep->hwtid); + } else { + skb_trim(skb, sizeof(*rpl)); + INIT_TP_WR(rpl, ep->hwtid); + } + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, + ep->hwtid)); + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID_F; -- cgit v1.2.3 From 54102dd410b037a4d7984e6a5826fb212c2f8aca Mon Sep 17 00:00:00 2001 From: Krishnamraju Eraparaju Date: Mon, 7 Oct 2019 15:56:27 +0530 Subject: RDMA/iwcm: move iw_rem_ref() calls out of spinlock kref release routines usually perform memory release operations, hence, they should not be called with spinlocks held. one such case is: SIW kref release routine siw_free_qp(), which can sleep via vfree() while freeing queue memory. Hence, all iw_rem_ref() calls in IWCM are moved out of spinlocks. Fixes: 922a8e9fb2e0 ("RDMA: iWARP Connection Manager.") Signed-off-by: Krishnamraju Eraparaju Reviewed-by: Bernard Metzler Link: https://lore.kernel.org/r/20191007102627.12568-1-krishna2@chelsio.com Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwcm.c | 52 +++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 72141c5b7c95..ade71823370f 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -372,6 +372,7 @@ EXPORT_SYMBOL(iw_cm_disconnect); static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; + struct ib_qp *qp; unsigned long flags; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -389,6 +390,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; + switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; @@ -401,7 +405,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* Abrupt close of the connection */ - (void)iwcm_modify_qp_err(cm_id_priv->qp); + (void)iwcm_modify_qp_err(qp); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_IDLE: @@ -426,11 +430,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) BUG(); break; } - if (cm_id_priv->qp) { - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); if (cm_id->mapped) { iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); @@ -671,11 +673,11 @@ int iw_cm_accept(struct iw_cm_id *cm_id, BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->ops.iw_rem_ref(qp); - cm_id_priv->qp = NULL; - } + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); } @@ -696,7 +698,7 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) struct iwcm_id_private *cm_id_priv; int ret; unsigned long flags; - struct ib_qp *qp; + struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -730,13 +732,13 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) return 0; /* success */ spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->ops.iw_rem_ref(qp); - cm_id_priv->qp = NULL; - } + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; err: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; @@ -878,6 +880,7 @@ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { + struct ib_qp *qp = NULL; unsigned long flags; int ret; @@ -896,11 +899,13 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); + qp = cm_id_priv->qp; cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); if (iw_event->private_data_len) @@ -942,21 +947,18 @@ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, static int cm_close_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { + struct ib_qp *qp; unsigned long flags; - int ret = 0; + int ret = 0, notify_event = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; - if (cm_id_priv->qp) { - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - } switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_IDLE; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); - spin_lock_irqsave(&cm_id_priv->lock, flags); + notify_event = 1; break; case IW_CM_STATE_DESTROYING: break; @@ -965,6 +967,10 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); + if (notify_event) + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); return ret; } -- cgit v1.2.3 From e17fa5c95ef2434a08e0be217969d246d037f0c2 Mon Sep 17 00:00:00 2001 From: Krishnamraju Eraparaju Date: Mon, 7 Oct 2019 16:12:29 +0530 Subject: RDMA/siw: free siw_base_qp in kref release routine As siw_free_qp() is the last routine to access 'siw_base_qp' structure, freeing this structure early in siw_destroy_qp() could cause touch-after-free issue. Hence, moved kfree(siw_base_qp) from siw_destroy_qp() to siw_free_qp(). Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Signed-off-by: Krishnamraju Eraparaju Link: https://lore.kernel.org/r/20191007104229.29412-1-krishna2@chelsio.com Signed-off-by: Doug Ledford --- drivers/infiniband/sw/siw/siw_qp.c | 2 ++ drivers/infiniband/sw/siw/siw_verbs.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 52d402f39df9..b4317480cee7 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -1312,6 +1312,7 @@ int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp) void siw_free_qp(struct kref *ref) { struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref); + struct siw_base_qp *siw_base_qp = to_siw_base_qp(qp->ib_qp); struct siw_device *sdev = qp->sdev; unsigned long flags; @@ -1334,4 +1335,5 @@ void siw_free_qp(struct kref *ref) atomic_dec(&sdev->num_qp); siw_dbg_qp(qp, "free QP\n"); kfree_rcu(qp, rcu); + kfree(siw_base_qp); } diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 869e02b69a01..b18a677832e1 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -604,7 +604,6 @@ out: int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) { struct siw_qp *qp = to_siw_qp(base_qp); - struct siw_base_qp *siw_base_qp = to_siw_base_qp(base_qp); struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); @@ -641,7 +640,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) qp->scq = qp->rcq = NULL; siw_qp_put(qp); - kfree(siw_base_qp); return 0; } -- cgit v1.2.3 From b806c94ee44e53233b8ce6c92d9078d9781786a5 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 8 Oct 2019 00:07:30 +0300 Subject: RDMA/qedr: Fix reported firmware version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove spaces from the reported firmware version string. Actual value: $ cat /sys/class/infiniband/qedr0/fw_ver 8. 37. 7. 0 Expected value: $ cat /sys/class/infiniband/qedr0/fw_ver 8.37.7.0 Fixes: ec72fce401c6 ("qedr: Add support for RoCE HW init") Signed-off-by: Kamal Heib Acked-by: Michal Kalderon  Link: https://lore.kernel.org/r/20191007210730.7173-1-kamalheib1@gmail.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 5136b835e1ba..dc71b6e16a07 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -76,7 +76,7 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str) struct qedr_dev *qedr = get_qedr_dev(ibdev); u32 fw_ver = (u32)qedr->attr.fw_ver; - snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d", (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF, (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); } -- cgit v1.2.3 From 777a8b32bc0f9bb25848a025f72a9febc30d9033 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 2 Oct 2019 15:17:50 +0300 Subject: IB/core: Use rdma_read_gid_l2_fields to compare GID L2 fields Current code tries to derive VLAN ID and compares it with GID attribute for matching entry. This raw search fails on macvlan netdevice as its not a VLAN device, but its an upper device of a VLAN netdevice. Due to this limitation, incoming QP1 packets fail to match in the GID table. Such packets are dropped. Hence, to support it, use the existing rdma_read_gid_l2_fields() that takes care of diffferent device types. Fixes: dbf727de7440 ("IB/core: Use GID table in AH creation and dmac resolution") Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191002121750.17313-1-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f974b6854224..35c2841a569e 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -662,16 +662,17 @@ static bool find_gid_index(const union ib_gid *gid, void *context) { struct find_gid_index_context *ctx = context; + u16 vlan_id = 0xffff; + int ret; if (ctx->gid_type != gid_attr->gid_type) return false; - if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || - (is_vlan_dev(gid_attr->ndev) && - vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); + if (ret) return false; - return true; + return ctx->vlan_id == vlan_id; } static const struct ib_gid_attr * -- cgit v1.2.3 From 0c258dec8d98af15b34dbffdb89c008b6da01ff8 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 16 Sep 2019 17:43:33 +0300 Subject: net/mlx5e: Tx, Fix assumption of single WQEBB of NOP in cleanup flow Cited patch removed the assumption only in datapath. Here we remove it also form control/cleanup flow. Fixes: 9ab0233728ca ("net/mlx5e: Tx, Don't implicitly assume SKB-less wqe has one WQEBB") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++++- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7569287f8f3c..b476b007f093 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1349,9 +1349,13 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) /* last doorbell out, godspeed .. */ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_tx_wqe_info *wi; struct mlx5e_tx_wqe *nop; - sq->db.wqe_info[pi].skb = NULL; + wi = &sq->db.wqe_info[pi]; + + memset(wi, 0, sizeof(*wi)); + wi->num_wqebbs = 1; nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc); mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index d3a67a9b4eba..9094e9519db7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -550,8 +550,8 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) wi = &sq->db.wqe_info[ci]; skb = wi->skb; - if (!skb) { /* nop */ - sq->cc++; + if (!skb) { + sq->cc += wi->num_wqebbs; continue; } -- cgit v1.2.3 From 500f36a485862cee15752b58a5a9a50c1f59ff58 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 16 Sep 2019 17:19:12 +0300 Subject: net/mlx5e: Tx, Zero-memset WQE info struct upon update Not all fields of WQE info are being written in the function, having some leftovers from previous rounds. Zero-memset it upon update. Particularly, not nullifying the wi->resync_dump_frag field will cause double free of the kTLS DUMPed frags. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 87be96747902..182d5c5664eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -92,7 +92,7 @@ mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq, /* fill sq frag edge with nops to avoid wqe wrapping two pages */ for (; wi < edge_wi; wi++) { - wi->skb = NULL; + memset(wi, 0, sizeof(*wi)); wi->num_wqebbs = 1; mlx5e_post_nop(wq, sq->sqn, &sq->pc); } -- cgit v1.2.3 From 2c559361389b452ca23494080d0c65ab812706c1 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 18 Sep 2019 13:45:38 +0300 Subject: net/mlx5e: kTLS, Release reference on DUMPed fragments in shutdown flow A call to kTLS completion handler was missing in the TXQSQ release flow. Add it. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ktls.h | 7 +++++- .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 11 +++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 28 ++++++++++++---------- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index b7298f9ee3d3..c4c128908b6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -86,7 +86,7 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_tx_wqe **wqe, u16 *pi); void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, - struct mlx5e_sq_dma *dma); + u32 *dma_fifo_cc); #else @@ -94,6 +94,11 @@ static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) { } +static inline void +mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) {} + #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index d195366461c9..90c6ce530a18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -303,9 +303,16 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, - struct mlx5e_sq_dma *dma) + u32 *dma_fifo_cc) { - struct mlx5e_sq_stats *stats = sq->stats; + struct mlx5e_sq_stats *stats; + struct mlx5e_sq_dma *dma; + + if (!wi->resync_dump_frag) + return; + + dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); + stats = sq->stats; mlx5e_tx_dma_unmap(sq->pdev, dma); __skb_frag_unref(wi->resync_dump_frag); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 9094e9519db7..8dd8f0be101b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -479,14 +479,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) skb = wi->skb; if (unlikely(!skb)) { -#ifdef CONFIG_MLX5_EN_TLS - if (wi->resync_dump_frag) { - struct mlx5e_sq_dma *dma = - mlx5e_dma_get(sq, dma_fifo_cc++); - - mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma); - } -#endif + mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc); sqcc += wi->num_wqebbs; continue; } @@ -542,29 +535,38 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; + u32 dma_fifo_cc; + u16 sqcc; u16 ci; int i; - while (sq->cc != sq->pc) { - ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); + sqcc = sq->cc; + dma_fifo_cc = sq->dma_fifo_cc; + + while (sqcc != sq->pc) { + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); wi = &sq->db.wqe_info[ci]; skb = wi->skb; if (!skb) { - sq->cc += wi->num_wqebbs; + mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc); + sqcc += wi->num_wqebbs; continue; } for (i = 0; i < wi->num_dma; i++) { struct mlx5e_sq_dma *dma = - mlx5e_dma_get(sq, sq->dma_fifo_cc++); + mlx5e_dma_get(sq, dma_fifo_cc++); mlx5e_tx_dma_unmap(sq->pdev, dma); } dev_kfree_skb_any(skb); - sq->cc += wi->num_wqebbs; + sqcc += wi->num_wqebbs; } + + sq->dma_fifo_cc = dma_fifo_cc; + sq->cc = sqcc; } #ifdef CONFIG_MLX5_CORE_IPOIB -- cgit v1.2.3 From 9b1fef2f23c1141c9936debe633ff16e44c6137b Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 1 Sep 2019 13:53:26 +0300 Subject: net/mlx5e: kTLS, Size of a Dump WQE is fixed No Eth segment, so no dynamic inline headers. The size of a Dump WQE is fixed, use constants and remove unnecessary checks. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h | 9 ++++++++- .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 17 +++-------------- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 182d5c5664eb..25f9dda578ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -23,7 +23,7 @@ #define MLX5E_SQ_TLS_ROOM \ (MLX5_SEND_WQE_MAX_WQEBBS + \ MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \ - MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS) + MAX_SKB_FRAGS * MLX5E_KTLS_DUMP_WQEBBS) #endif #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index c4c128908b6e..eb692feba4a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -21,7 +21,14 @@ MLX5_ST_SZ_BYTES(tls_progress_params)) #define MLX5E_KTLS_PROGRESS_WQEBBS \ (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB)) -#define MLX5E_KTLS_MAX_DUMP_WQEBBS 2 + +struct mlx5e_dump_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_data_seg data; +}; + +#define MLX5E_KTLS_DUMP_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB)) enum { MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 90c6ce530a18..ac54767b7d86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -250,11 +250,6 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true); } -struct mlx5e_dump_wqe { - struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_data_seg data; -}; - static int tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first) { @@ -262,7 +257,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir struct mlx5_wqe_data_seg *dseg; struct mlx5e_dump_wqe *wqe; dma_addr_t dma_addr = 0; - u8 num_wqebbs; u16 ds_cnt; int fsz; u16 pi; @@ -270,7 +264,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); cseg = &wqe->ctrl; dseg = &wqe->data; @@ -291,12 +284,8 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir dseg->byte_count = cpu_to_be32(fsz); mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); - tx_fill_wi(sq, pi, num_wqebbs, frag, fsz); - sq->pc += num_wqebbs; - - WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS, - "unexpected DUMP num_wqebbs, %d > %d", - num_wqebbs, MLX5E_KTLS_MAX_DUMP_WQEBBS); + tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, frag, fsz); + sq->pc += MLX5E_KTLS_DUMP_WQEBBS; return 0; } @@ -368,7 +357,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, stats->tls_ooo++; num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + - (info.nr_frags ? info.nr_frags * MLX5E_KTLS_MAX_DUMP_WQEBBS : 1); + (info.nr_frags ? info.nr_frags * MLX5E_KTLS_DUMP_WQEBBS : 1); pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); if (unlikely(contig_wqebbs_room < num_wqebbs)) -- cgit v1.2.3 From f45da3716fb2fb09e301a1b6edf200ff343dc06e Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 18 Sep 2019 13:50:32 +0300 Subject: net/mlx5e: kTLS, Save only the frag page to release at completion In TX resync flow where DUMP WQEs are posted, keep a pointer to the fragment page to unref it upon completion, instead of saving the whole fragment. In addition, move it the end of the arguments list in tx_fill_wi(). Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 27 +++++++++++----------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8d76452cacdc..cb6f7b87e38f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -345,7 +345,7 @@ struct mlx5e_tx_wqe_info { u8 num_wqebbs; u8 num_dma; #ifdef CONFIG_MLX5_EN_TLS - skb_frag_t *resync_dump_frag; + struct page *resync_dump_frag_page; #endif }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index ac54767b7d86..6dfb22d705b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -108,16 +108,15 @@ build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn, } static void tx_fill_wi(struct mlx5e_txqsq *sq, - u16 pi, u8 num_wqebbs, - skb_frag_t *resync_dump_frag, - u32 num_bytes) + u16 pi, u8 num_wqebbs, u32 num_bytes, + struct page *page) { struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; - wi->skb = NULL; - wi->num_wqebbs = num_wqebbs; - wi->resync_dump_frag = resync_dump_frag; - wi->num_bytes = num_bytes; + memset(wi, 0, sizeof(*wi)); + wi->num_wqebbs = num_wqebbs; + wi->num_bytes = num_bytes; + wi->resync_dump_frag_page = page; } void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) @@ -145,7 +144,7 @@ post_static_params(struct mlx5e_txqsq *sq, umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi); build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence); - tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL, 0); + tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, 0, NULL); sq->pc += MLX5E_KTLS_STATIC_WQEBBS; } @@ -159,7 +158,7 @@ post_progress_params(struct mlx5e_txqsq *sq, wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi); build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence); - tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL, 0); + tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, 0, NULL); sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS; } @@ -211,7 +210,7 @@ static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, while (remaining > 0) { skb_frag_t *frag = &record->frags[i]; - __skb_frag_ref(frag); + get_page(skb_frag_page(frag)); remaining -= skb_frag_size(frag); info->frags[i++] = frag; } @@ -284,7 +283,7 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir dseg->byte_count = cpu_to_be32(fsz); mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); - tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, frag, fsz); + tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, fsz, skb_frag_page(frag)); sq->pc += MLX5E_KTLS_DUMP_WQEBBS; return 0; @@ -297,14 +296,14 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_sq_stats *stats; struct mlx5e_sq_dma *dma; - if (!wi->resync_dump_frag) + if (!wi->resync_dump_frag_page) return; dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); stats = sq->stats; mlx5e_tx_dma_unmap(sq->pdev, dma); - __skb_frag_unref(wi->resync_dump_frag); + put_page(wi->resync_dump_frag_page); stats->tls_dump_packets++; stats->tls_dump_bytes += wi->num_bytes; } @@ -314,7 +313,7 @@ static void tx_post_fence_nop(struct mlx5e_txqsq *sq) struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - tx_fill_wi(sq, pi, 1, NULL, 0); + tx_fill_wi(sq, pi, 1, 0, NULL); mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); } -- cgit v1.2.3 From 310d9b9d37220b590909e90e724fc5f346a98775 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 18 Sep 2019 13:57:40 +0300 Subject: net/mlx5e: kTLS, Save by-value copy of the record frags Access the record fragments only under the TLS ctx lock. In the resync flow, save a copy of them to be used when preparing and posting the required DUMP WQEs. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 6dfb22d705b2..334808b1863b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -179,7 +179,7 @@ struct tx_sync_info { u64 rcd_sn; s32 sync_len; int nr_frags; - skb_frag_t *frags[MAX_SKB_FRAGS]; + skb_frag_t frags[MAX_SKB_FRAGS]; }; static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, @@ -212,11 +212,11 @@ static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, get_page(skb_frag_page(frag)); remaining -= skb_frag_size(frag); - info->frags[i++] = frag; + info->frags[i++] = *frag; } /* reduce the part which will be sent with the original SKB */ if (remaining < 0) - skb_frag_size_add(info->frags[i - 1], remaining); + skb_frag_size_add(&info->frags[i - 1], remaining); info->nr_frags = i; out: spin_unlock_irqrestore(&tx_ctx->lock, flags); @@ -365,7 +365,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, tx_post_resync_params(sq, priv_tx, info.rcd_sn); for (i = 0; i < info.nr_frags; i++) - if (tx_post_resync_dump(sq, info.frags[i], priv_tx->tisn, !i)) + if (tx_post_resync_dump(sq, &info.frags[i], priv_tx->tisn, !i)) goto err_out; /* If no dump WQE was sent, we need to have a fence NOP WQE before the -- cgit v1.2.3 From b61b24bd135a7775a2839863bd1d58a462a5f1e5 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 18 Sep 2019 13:57:40 +0300 Subject: net/mlx5e: kTLS, Fix page refcnt leak in TX resync error flow All references for frag pages that are obtained in tx_sync_info_get() should be released. Release usually occurs in the corresponding CQE of the WQE. In error flows, not all fragments have a WQE posted for them, hence no matching CQE will be generated. For these pages, release the reference in the error flow. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 334808b1863b..5f1d18fb644e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -329,7 +329,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, struct tx_sync_info info = {}; u16 contig_wqebbs_room, pi; u8 num_wqebbs; - int i; + int i = 0; if (!tx_sync_info_get(priv_tx, seq, &info)) { /* We might get here if a retransmission reaches the driver @@ -364,7 +364,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, tx_post_resync_params(sq, priv_tx, info.rcd_sn); - for (i = 0; i < info.nr_frags; i++) + for (; i < info.nr_frags; i++) if (tx_post_resync_dump(sq, &info.frags[i], priv_tx->tisn, !i)) goto err_out; @@ -377,6 +377,9 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, return skb; err_out: + for (; i < info.nr_frags; i++) + put_page(skb_frag_page(&info.frags[i])); + dev_kfree_skb_any(skb); return NULL; } -- cgit v1.2.3 From 700ec497424069fa4d8f3715759c4aaec016e840 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 7 Oct 2019 13:59:11 +0300 Subject: net/mlx5e: kTLS, Fix missing SQ edge fill Before posting the context params WQEs, make sure there is enough contiguous room for them, and fill frag edge if needed. When posting only a nop, no need for room check, as it needs a single WQEBB, meaning no contiguity issue. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 28 +++++++++++++++------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 5f1d18fb644e..59e3f48470d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -168,6 +168,14 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, bool skip_static_post, bool fence_first_post) { bool progress_fence = skip_static_post || !fence_first_post; + struct mlx5_wq_cyc *wq = &sq->wq; + u16 contig_wqebbs_room, pi; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < + MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)) + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); if (!skip_static_post) post_static_params(sq, priv_tx, fence_first_post); @@ -355,10 +363,20 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, stats->tls_ooo++; - num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + - (info.nr_frags ? info.nr_frags * MLX5E_KTLS_DUMP_WQEBBS : 1); + tx_post_resync_params(sq, priv_tx, info.rcd_sn); + + /* If no dump WQE was sent, we need to have a fence NOP WQE before the + * actual data xmit. + */ + if (!info.nr_frags) { + tx_post_fence_nop(sq); + return skb; + } + + num_wqebbs = info.nr_frags * MLX5E_KTLS_DUMP_WQEBBS; pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); @@ -368,12 +386,6 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, if (tx_post_resync_dump(sq, &info.frags[i], priv_tx->tisn, !i)) goto err_out; - /* If no dump WQE was sent, we need to have a fence NOP WQE before the - * actual data xmit. - */ - if (!info.nr_frags) - tx_post_fence_nop(sq); - return skb; err_out: -- cgit v1.2.3 From 84d1bb2b139e0184b1754aa1b5776186b475fce8 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 7 Oct 2019 14:01:29 +0300 Subject: net/mlx5e: kTLS, Limit DUMP wqe size HW expects the data size in DUMP WQEs to be up to MTU. Make sure they are in range. We elevate the frag page refcount by 'n-1', in addition to the one obtained in tx_sync_info_get(), having an overall of 'n' references. We bulk increments by using a single page_ref_add() command, to optimize perfermance. The refcounts are released one by one, by the corresponding completions. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 11 ++++--- .../ethernet/mellanox/mlx5/core/en_accel/ktls.h | 11 ++++++- .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 34 +++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++++- 5 files changed, 52 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cb6f7b87e38f..f1a7bc46f1c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -410,6 +410,7 @@ struct mlx5e_txqsq { struct device *pdev; __be32 mkey_be; unsigned long state; + unsigned int hw_mtu; struct hwtstamp_config *tstamp; struct mlx5_clock *clock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 25f9dda578ac..7c8796d9743f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -15,15 +15,14 @@ #else /* TLS offload requires additional stop_room for: * - a resync SKB. - * kTLS offload requires additional stop_room for: - * - static params WQE, - * - progress params WQE, and - * - resync DUMP per frag. + * kTLS offload requires fixed additional stop_room for: + * - a static params WQE, and a progress params WQE. + * The additional MTU-depending room for the resync DUMP WQEs + * will be calculated and added in runtime. */ #define MLX5E_SQ_TLS_ROOM \ (MLX5_SEND_WQE_MAX_WQEBBS + \ - MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \ - MAX_SKB_FRAGS * MLX5E_KTLS_DUMP_WQEBBS) + MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS) #endif #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index eb692feba4a6..929966e6fbc4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -94,7 +94,16 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc); - +static inline u8 +mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags, + unsigned int sync_len) +{ + /* Given the MTU and sync_len, calculates an upper bound for the + * number of WQEBBs needed for the TX resync DUMP WQEs of a record. + */ + return MLX5E_KTLS_DUMP_WQEBBS * + (nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu)); +} #else static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 59e3f48470d9..e10b0bb696da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -373,7 +373,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, return skb; } - num_wqebbs = info.nr_frags * MLX5E_KTLS_DUMP_WQEBBS; + num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len); pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); @@ -382,14 +382,40 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, tx_post_resync_params(sq, priv_tx, info.rcd_sn); - for (; i < info.nr_frags; i++) - if (tx_post_resync_dump(sq, &info.frags[i], priv_tx->tisn, !i)) - goto err_out; + for (; i < info.nr_frags; i++) { + unsigned int orig_fsz, frag_offset = 0, n = 0; + skb_frag_t *f = &info.frags[i]; + + orig_fsz = skb_frag_size(f); + + do { + bool fence = !(i || frag_offset); + unsigned int fsz; + + n++; + fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset); + skb_frag_size_set(f, fsz); + if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) { + page_ref_add(skb_frag_page(f), n - 1); + goto err_out; + } + + skb_frag_off_add(f, fsz); + frag_offset += fsz; + } while (frag_offset < orig_fsz); + + page_ref_add(skb_frag_page(f), n - 1); + } return skb; err_out: for (; i < info.nr_frags; i++) + /* The put_page() here undoes the page ref obtained in tx_sync_info_get(). + * Page refs obtained for the DUMP WQEs above (by page_ref_add) will be + * released only upon their completions (or in mlx5e_free_txqsq_descs, + * if channel closes). + */ put_page(skb_frag_page(&info.frags[i])); dev_kfree_skb_any(skb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b476b007f093..772bfdbdeb9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1128,6 +1128,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->txq_ix = txq_ix; sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); @@ -1135,10 +1136,14 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); +#ifdef CONFIG_MLX5_EN_TLS if (mlx5_accel_is_tls_device(c->priv->mdev)) { set_bit(MLX5E_SQ_STATE_TLS, &sq->state); - sq->stop_room += MLX5E_SQ_TLS_ROOM; + sq->stop_room += MLX5E_SQ_TLS_ROOM + + mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS, + TLS_MAX_PAYLOAD_SIZE); } +#endif param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); -- cgit v1.2.3 From ecdc65a3ec5d45725355479d63c23a20f4582104 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 6 Oct 2019 18:25:17 +0300 Subject: net/mlx5e: kTLS, Remove unneeded cipher type checks Cipher type is checked upon connection addition. No need to recheck it per every TX resync invocation. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index e10b0bb696da..1bfeb558ff78 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -31,9 +31,6 @@ fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) char *salt, *rec_seq; u8 tls_version; - if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128)) - return; - info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; EXTRACT_INFO_FIELDS; @@ -243,9 +240,6 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, u16 rec_seq_sz; char *rec_seq; - if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128)) - return; - info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; rec_seq = info->rec_seq; rec_seq_sz = sizeof(info->rec_seq); -- cgit v1.2.3 From af11a7a42454b17c77da5fa55b6b6325b11d60e5 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 22 Sep 2019 14:05:24 +0300 Subject: net/mlx5e: kTLS, Save a copy of the crypto info Do not assume the crypto info is accessible during the connection lifetime. Save a copy of it in the private TX context. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 8 ++------ 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index d2ff74d52720..46725cd743a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -38,7 +38,7 @@ static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, return -ENOMEM; tx_priv->expected_seq = start_offload_tcp_sn; - tx_priv->crypto_info = crypto_info; + tx_priv->crypto_info = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv); /* tc and underlay_qpn values are not in use for tls tis */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index 929966e6fbc4..a3efa29a4629 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -44,7 +44,7 @@ enum { struct mlx5e_ktls_offload_context_tx { struct tls_offload_context_tx *tx_ctx; - struct tls_crypto_info *crypto_info; + struct tls12_crypto_info_aes_gcm_128 crypto_info; u32 expected_seq; u32 tisn; u32 key_id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 1bfeb558ff78..badc6fd26a14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -24,14 +24,12 @@ enum { static void fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) { - struct tls_crypto_info *crypto_info = priv_tx->crypto_info; - struct tls12_crypto_info_aes_gcm_128 *info; + struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info; char *initial_rn, *gcm_iv; u16 salt_sz, rec_seq_sz; char *salt, *rec_seq; u8 tls_version; - info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; EXTRACT_INFO_FIELDS; gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv); @@ -233,14 +231,12 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, struct mlx5e_ktls_offload_context_tx *priv_tx, u64 rcd_sn) { - struct tls_crypto_info *crypto_info = priv_tx->crypto_info; - struct tls12_crypto_info_aes_gcm_128 *info; + struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info; __be64 rn_be = cpu_to_be64(rcd_sn); bool skip_static_post; u16 rec_seq_sz; char *rec_seq; - info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; rec_seq = info->rec_seq; rec_seq_sz = sizeof(info->rec_seq); -- cgit v1.2.3 From 46a3ea98074e2a7731ab9b84ec60fc18a2f909e5 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 3 Oct 2019 10:48:10 +0300 Subject: net/mlx5e: kTLS, Enhance TX resync flow Once the kTLS TX resync function is called, it used to return a binary value, for success or failure. However, in case the TLS SKB is a retransmission of the connection handshake, it initiates the resync flow (as the tcp seq check holds), while regular packet handle is expected. In this patch, we identify this case and skip the resync operation accordingly. Counters: - Add a counter (tls_skip_no_sync_data) to monitor this. - Bump the dump counters up as they are used more frequently. - Add a missing counter descriptor declaration for tls_resync_bytes in sq_stats_desc. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 58 +++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 16 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 10 ++-- 3 files changed, 51 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index badc6fd26a14..778dab1af8fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -185,26 +185,33 @@ struct tx_sync_info { skb_frag_t frags[MAX_SKB_FRAGS]; }; -static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, - u32 tcp_seq, struct tx_sync_info *info) +enum mlx5e_ktls_sync_retval { + MLX5E_KTLS_SYNC_DONE, + MLX5E_KTLS_SYNC_FAIL, + MLX5E_KTLS_SYNC_SKIP_NO_DATA, +}; + +static enum mlx5e_ktls_sync_retval +tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, + u32 tcp_seq, struct tx_sync_info *info) { struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx; + enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE; struct tls_record_info *record; int remaining, i = 0; unsigned long flags; - bool ret = true; spin_lock_irqsave(&tx_ctx->lock, flags); record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn); if (unlikely(!record)) { - ret = false; + ret = MLX5E_KTLS_SYNC_FAIL; goto out; } if (unlikely(tcp_seq < tls_record_start_seq(record))) { - if (!tls_record_is_start_marker(record)) - ret = false; + ret = tls_record_is_start_marker(record) ? + MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL; goto out; } @@ -316,20 +323,26 @@ static void tx_post_fence_nop(struct mlx5e_txqsq *sq) mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); } -static struct sk_buff * +static enum mlx5e_ktls_sync_retval mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, struct mlx5e_txqsq *sq, - struct sk_buff *skb, + int datalen, u32 seq) { struct mlx5e_sq_stats *stats = sq->stats; struct mlx5_wq_cyc *wq = &sq->wq; + enum mlx5e_ktls_sync_retval ret; struct tx_sync_info info = {}; u16 contig_wqebbs_room, pi; u8 num_wqebbs; int i = 0; - if (!tx_sync_info_get(priv_tx, seq, &info)) { + ret = tx_sync_info_get(priv_tx, seq, &info); + if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) { + if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) { + stats->tls_skip_no_sync_data++; + return MLX5E_KTLS_SYNC_SKIP_NO_DATA; + } /* We might get here if a retransmission reaches the driver * after the relevant record is acked. * It should be safe to drop the packet in this case @@ -339,13 +352,8 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, } if (unlikely(info.sync_len < 0)) { - u32 payload; - int headln; - - headln = skb_transport_offset(skb) + tcp_hdrlen(skb); - payload = skb->len - headln; - if (likely(payload <= -info.sync_len)) - return skb; + if (likely(datalen <= -info.sync_len)) + return MLX5E_KTLS_SYNC_DONE; stats->tls_drop_bypass_req++; goto err_out; @@ -360,7 +368,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, */ if (!info.nr_frags) { tx_post_fence_nop(sq); - return skb; + return MLX5E_KTLS_SYNC_DONE; } num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len); @@ -397,7 +405,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, page_ref_add(skb_frag_page(f), n - 1); } - return skb; + return MLX5E_KTLS_SYNC_DONE; err_out: for (; i < info.nr_frags; i++) @@ -408,8 +416,7 @@ err_out: */ put_page(skb_frag_page(&info.frags[i])); - dev_kfree_skb_any(skb); - return NULL; + return MLX5E_KTLS_SYNC_FAIL; } struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, @@ -445,10 +452,15 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, seq = ntohl(tcp_hdr(skb)->seq); if (unlikely(priv_tx->expected_seq != seq)) { - skb = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, skb, seq); - if (unlikely(!skb)) + enum mlx5e_ktls_sync_retval ret = + mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq); + + if (likely(ret == MLX5E_KTLS_SYNC_DONE)) + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); + else if (ret == MLX5E_KTLS_SYNC_FAIL) + goto err_out; + else /* ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA */ goto out; - *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); } priv_tx->expected_seq = seq + datalen; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index ac6fdcda7019..7e6ebd0505cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -52,11 +52,12 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_skip_no_sync_data) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, #endif { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, @@ -288,11 +289,12 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; s->tx_tls_ctx += sq_stats->tls_ctx; s->tx_tls_ooo += sq_stats->tls_ooo; + s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; + s->tx_tls_dump_packets += sq_stats->tls_dump_packets; s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; + s->tx_tls_skip_no_sync_data += sq_stats->tls_skip_no_sync_data; s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data; s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req; - s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; - s->tx_tls_dump_packets += sq_stats->tls_dump_packets; #endif s->tx_cqes += sq_stats->cqes; } @@ -1472,10 +1474,12 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, #endif { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 79f261bf86ac..869f3502f631 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -129,11 +129,12 @@ struct mlx5e_sw_stats { u64 tx_tls_encrypted_bytes; u64 tx_tls_ctx; u64 tx_tls_ooo; + u64 tx_tls_dump_packets; + u64 tx_tls_dump_bytes; u64 tx_tls_resync_bytes; + u64 tx_tls_skip_no_sync_data; u64 tx_tls_drop_no_sync_data; u64 tx_tls_drop_bypass_req; - u64 tx_tls_dump_packets; - u64 tx_tls_dump_bytes; #endif u64 rx_xsk_packets; @@ -273,11 +274,12 @@ struct mlx5e_sq_stats { u64 tls_encrypted_bytes; u64 tls_ctx; u64 tls_ooo; + u64 tls_dump_packets; + u64 tls_dump_bytes; u64 tls_resync_bytes; + u64 tls_skip_no_sync_data; u64 tls_drop_no_sync_data; u64 tls_drop_bypass_req; - u64 tls_dump_packets; - u64 tls_dump_bytes; #endif /* less likely accessed in data path */ u64 csum_none; -- cgit v1.2.3 From 61ea02d2c13106116c6e4916ac5d9dd41151c959 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 24 Sep 2019 11:29:09 +0300 Subject: net/mlx5e: TX, Fix consumer index of error cqe dump The completion queue consumer index increments upon a call to mlx5_cqwq_pop(). When dumping an error CQE, the index is already incremented. Decrease one for the print command. Fixes: 16cc14d81733 ("net/mlx5e: Dump xmit error completions") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 8dd8f0be101b..67dc4f0921b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -403,7 +403,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq, struct mlx5_err_cqe *err_cqe) { - u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq); + struct mlx5_cqwq *wq = &sq->cq.wq; + u32 ci; + + ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1); netdev_err(sq->channel->netdev, "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", -- cgit v1.2.3 From c8c2a057fdc7de1cd16f4baa51425b932a42eb39 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 24 Sep 2019 22:20:34 -0500 Subject: net/mlx5: prevent memory leak in mlx5_fpga_conn_create_cq In mlx5_fpga_conn_create_cq if mlx5_vector2eqn fails the allocated memory should be released. Fixes: 537a50574175 ("net/mlx5: FPGA, Add high-speed connection routines") Signed-off-by: Navid Emamdoost Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 4c50efe4e7f1..61021133029e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -464,8 +464,10 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) } err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); - if (err) + if (err) { + kvfree(in); goto err_cqwq; + } cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); -- cgit v1.2.3 From c7ed6d0183d5ea9bc31bcaeeba4070bd62546471 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 27 Sep 2019 17:37:28 -0500 Subject: net/mlx5: fix memory leak in mlx5_fw_fatal_reporter_dump In mlx5_fw_fatal_reporter_dump if mlx5_crdump_collect fails the allocated memory for cr_data must be released otherwise there will be memory leak. To fix this, this commit changes the return instruction into goto error handling. Fixes: 9b1f29823605 ("net/mlx5: Add support for FW fatal reporter dump") Signed-off-by: Navid Emamdoost Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index d685122d9ff7..c07f3154437c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -572,7 +572,7 @@ mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, return -ENOMEM; err = mlx5_crdump_collect(dev, cr_data); if (err) - return err; + goto free_data; if (priv_ctx) { struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; -- cgit v1.2.3 From 11875ba7f251c52effb2b924e04c2ddefa9856ef Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 18 Oct 2019 14:00:42 +0200 Subject: selftests/bpf: More compatible nc options in test_tc_edt Out of the three nc implementations widely in use, at least two (BSD netcat and nmap-ncat) do not support -l combined with -s. Modify the nc invocation to be accepted by all of them. Fixes: 7df5e3db8f63 ("selftests: bpf: tc-bpf flow shaping with EDT") Signed-off-by: Jiri Benc Signed-off-by: Daniel Borkmann Acked-by: Peter Oskolkov Link: https://lore.kernel.org/bpf/f5bf07dccd8b552a76c84d49e80b86c5aa071122.1571400024.git.jbenc@redhat.com --- tools/testing/selftests/bpf/test_tc_edt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh index f38567ef694b..daa7d1b8d309 100755 --- a/tools/testing/selftests/bpf/test_tc_edt.sh +++ b/tools/testing/selftests/bpf/test_tc_edt.sh @@ -59,7 +59,7 @@ ip netns exec ${NS_SRC} tc filter add dev veth_src egress \ # start the listener ip netns exec ${NS_DST} bash -c \ - "nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &" + "nc -4 -l -p 9000 >/dev/null &" declare -i NC_PID=$! sleep 1 -- cgit v1.2.3 From 66cf50e65b183c863825f5c28a818e3f47a72e40 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 18 Oct 2019 16:04:58 +0200 Subject: scsi: qla2xxx: fixup incorrect usage of host_byte DRIVER_ERROR is a a driver byte setting, not a host byte. The qla2xxx driver should rather return DID_ERROR here to be in line with the other drivers. Link: https://lore.kernel.org/r/20191018140458.108278-1-hare@suse.de Signed-off-by: Hannes Reinecke Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_bsg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 28d587a89ba6..99f0a1a08143 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -253,7 +253,7 @@ qla2x00_process_els(struct bsg_job *bsg_job) srb_t *sp; const char *type; int req_sg_cnt, rsp_sg_cnt; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); uint16_t nextlid = 0; if (bsg_request->msgcode == FC_BSG_RPT_ELS) { @@ -432,7 +432,7 @@ qla2x00_process_ct(struct bsg_job *bsg_job) struct Scsi_Host *host = fc_bsg_to_shost(bsg_job); scsi_qla_host_t *vha = shost_priv(host); struct qla_hw_data *ha = vha->hw; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); int req_sg_cnt, rsp_sg_cnt; uint16_t loop_id; struct fc_port *fcport; @@ -1950,7 +1950,7 @@ qlafx00_mgmt_cmd(struct bsg_job *bsg_job) struct Scsi_Host *host = fc_bsg_to_shost(bsg_job); scsi_qla_host_t *vha = shost_priv(host); struct qla_hw_data *ha = vha->hw; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); struct qla_mt_iocb_rqst_fx00 *piocb_rqst; srb_t *sp; int req_sg_cnt = 0, rsp_sg_cnt = 0; -- cgit v1.2.3 From 535fb49e730a6fe1e9f11af4ae67ef4228ff4287 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 18 Oct 2019 18:21:11 +0200 Subject: scsi: lpfc: Check queue pointer before use The queue pointer might not be valid. The rest of the code checks the pointer before accessing it. lpfc_sli4_process_missed_mbox_completions is the only place where the check is missing. Fixes: 657add4e5e15 ("scsi: lpfc: Fix poor use of hardware queues if fewer irq vectors") Cc: James Smart Link: https://lore.kernel.org/r/20191018162111.8798-1-dwagner@suse.de Signed-off-by: Daniel Wagner Reviewed-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index a0c6945b8139..614f78dddafe 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -7866,7 +7866,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba) if (sli4_hba->hdwq) { for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++) { eq = phba->sli4_hba.hba_eq_hdl[eqidx].eq; - if (eq->queue_id == sli4_hba->mbx_cq->assoc_qid) { + if (eq && eq->queue_id == sli4_hba->mbx_cq->assoc_qid) { fpeq = eq; break; } -- cgit v1.2.3 From 74e5e468b664d3739b2872d54764af97ac38e795 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Thu, 10 Oct 2019 11:31:07 +0300 Subject: scsi: ufs-bsg: Wake the device before sending raw upiu commands The scsi async probe process is calling blk_pm_runtime_init for each lun, and then those request queues are monitored by the block layer pm engine (blk-pm.c). This is however, not the case for scsi-passthrough queues, created by bsg_setup_queue(). So the ufs-bsg driver might send various commands, disregarding the pm status of the device. This is wrong, regardless if its request queue is pm-aware or not. Fixes: df032bf27a41 (scsi: ufs: Add a bsg endpoint that supports UPIUs) Link: https://lore.kernel.org/r/1570696267-8487-1-git-send-email-avri.altman@wdc.com Reported-by: Yuliy Izrailov Signed-off-by: Avri Altman Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs_bsg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c index a9344eb4e047..dc2f6d2b46ed 100644 --- a/drivers/scsi/ufs/ufs_bsg.c +++ b/drivers/scsi/ufs/ufs_bsg.c @@ -98,6 +98,8 @@ static int ufs_bsg_request(struct bsg_job *job) bsg_reply->reply_payload_rcv_len = 0; + pm_runtime_get_sync(hba->dev); + msgcode = bsg_request->msgcode; switch (msgcode) { case UPIU_TRANSACTION_QUERY_REQ: @@ -135,6 +137,8 @@ static int ufs_bsg_request(struct bsg_job *job) break; } + pm_runtime_put_sync(hba->dev); + if (!desc_buff) goto out; -- cgit v1.2.3 From 05679ca6feebc1ef3bf743563315d9975adcf6fb Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 17 Oct 2019 12:57:02 +0200 Subject: xdp: Prevent overflow in devmap_hash cost calculation for 32-bit builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tetsuo pointed out that without an explicit cast, the cost calculation for devmap_hash type maps could overflow on 32-bit builds. This adds the missing cast. Fixes: 6f9d451ab1a3 ("xdp: Add devmap_hash map type for looking up devices by hashed index") Reported-by: Tetsuo Handa Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20191017105702.2807093-1-toke@redhat.com --- kernel/bpf/devmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index d27f3b60ff6d..c0a48f336997 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -128,7 +128,7 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) if (!dtab->n_buckets) /* Overflow check */ return -EINVAL; - cost += sizeof(struct hlist_head) * dtab->n_buckets; + cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets; } /* if map size is larger than memlock limit, reject it */ -- cgit v1.2.3 From 83c774f0c69d9d1b32812f3fcf7dde9b556d2670 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 24 Sep 2019 21:01:15 +0300 Subject: interconnect: qcom: Fix icc_onecell_data allocation This is a struct with a trailing zero-length array of icc_node pointers but it's allocated as if it were a single array of icc_nodes instead. This allocates too much memory at probe time but shouldn't have any noticeable effect. Both sdm845 and qcs404 are affected. Fix by replacing kcalloc with kzalloc and using the "struct_size" macro. Signed-off-by: Leonard Crestez Fixes: 5e4e6c4d3ae0 ("interconnect: qcom: Add QCS404 interconnect provider driver") Link: https://lore.kernel.org/linux-pm/a7360abb6561917e30bbfaa6084578449152bf1d.1569348056.git.leonard.crestez@nxp.com/ Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/qcs404.c | 3 ++- drivers/interconnect/qcom/sdm845.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/interconnect/qcom/qcs404.c b/drivers/interconnect/qcom/qcs404.c index 910081d6ddc0..b4966d8f3348 100644 --- a/drivers/interconnect/qcom/qcs404.c +++ b/drivers/interconnect/qcom/qcs404.c @@ -433,7 +433,8 @@ static int qnoc_probe(struct platform_device *pdev) if (!qp) return -ENOMEM; - data = devm_kcalloc(dev, num_nodes, sizeof(*node), GFP_KERNEL); + data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes), + GFP_KERNEL); if (!data) return -ENOMEM; diff --git a/drivers/interconnect/qcom/sdm845.c b/drivers/interconnect/qcom/sdm845.c index 57955596bb59..502a6c22b41e 100644 --- a/drivers/interconnect/qcom/sdm845.c +++ b/drivers/interconnect/qcom/sdm845.c @@ -790,7 +790,8 @@ static int qnoc_probe(struct platform_device *pdev) if (!qp) return -ENOMEM; - data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL); + data = devm_kzalloc(&pdev->dev, struct_size(data, nodes, num_nodes), + GFP_KERNEL); if (!data) return -ENOMEM; -- cgit v1.2.3 From a8dfe193a60c6db7c54e03e3f1b96e0aa7244990 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Fri, 18 Oct 2019 17:17:50 +0300 Subject: interconnect: Add locking in icc_set_tag() We must ensure that the tag is not changed while we aggregate the requests. Currently the icc_set_tag() is not using any locks and this may cause the values to be aggregated incorrectly. Fix this by acquiring the icc_lock while we set the tag. Link: https://lore.kernel.org/lkml/20191018141750.17032-1-georgi.djakov@linaro.org/ Fixes: 127ab2cc5f19 ("interconnect: Add support for path tags") Reviewed-by: Bjorn Andersson Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 7b971228df38..c498796adc07 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -405,8 +405,12 @@ void icc_set_tag(struct icc_path *path, u32 tag) if (!path) return; + mutex_lock(&icc_lock); + for (i = 0; i < path->num_nodes; i++) path->reqs[i].tag = tag; + + mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_set_tag); -- cgit v1.2.3 From 6fc28b7e0aac8a72217b314dfb5208321d9448e2 Mon Sep 17 00:00:00 2001 From: "amy.shih" Date: Mon, 14 Oct 2019 16:24:51 +0800 Subject: hwmon: (nct7904) Fix the incorrect value of vsen_mask & tcpu_mask & temp_mode in nct7904_data struct. Voltage sensors overlap with external temperature sensors. Detect the multi-function of voltage, thermal diode, thermistor and reserved from register VT_ADC_MD_REG to set value of vsen_mask & tcpu_mask & temp_mode in nct7904_data struct. If the value is reserved, needs to disable the vsen_mask & tcpu_mask. Signed-off-by: amy.shih Link: https://lore.kernel.org/r/20191014082451.2895-1-Amy.Shih@advantech.com.tw Signed-off-by: Guenter Roeck --- drivers/hwmon/nct7904.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c index b26419dbe840..281c81edabc6 100644 --- a/drivers/hwmon/nct7904.c +++ b/drivers/hwmon/nct7904.c @@ -82,6 +82,10 @@ #define FANCTL1_FMR_REG 0x00 /* Bank 3; 1 reg per channel */ #define FANCTL1_OUT_REG 0x10 /* Bank 3; 1 reg per channel */ +#define VOLT_MONITOR_MODE 0x0 +#define THERMAL_DIODE_MODE 0x1 +#define THERMISTOR_MODE 0x3 + #define ENABLE_TSI BIT(1) static const unsigned short normal_i2c[] = { @@ -935,11 +939,16 @@ static int nct7904_probe(struct i2c_client *client, for (i = 0; i < 4; i++) { val = (ret >> (i * 2)) & 0x03; bit = (1 << i); - if (val == 0) { + if (val == VOLT_MONITOR_MODE) { data->tcpu_mask &= ~bit; + } else if (val == THERMAL_DIODE_MODE && i < 2) { + data->temp_mode |= bit; + data->vsen_mask &= ~(0x06 << (i * 2)); + } else if (val == THERMISTOR_MODE) { + data->vsen_mask &= ~(0x02 << (i * 2)); } else { - if (val == 0x1 || val == 0x2) - data->temp_mode |= bit; + /* Reserved */ + data->tcpu_mask &= ~bit; data->vsen_mask &= ~(0x06 << (i * 2)); } } -- cgit v1.2.3 From cf9249626f72878b6d205a4965093cba5cce98df Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 16 Sep 2019 14:23:43 -0500 Subject: soundwire: intel: fix intel_register_dai PDI offsets and numbers There are two issues, likely copy/paste: 1. Use cdns->pcm.num_in instead of stream_num_in for consistency with the rest of the code. This was not detected earlier since platforms did not have input-only PDIs. 2. use the correct offset for bi-dir PDM, based on IN and OUT PDIs. Again this was not detected since PDM was not supported earlier. Reported-by: Ranjani Sridharan Signed-off-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190916192348.467-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index f1e38a293967..13c54eac0cc3 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -900,7 +900,7 @@ static int intel_register_dai(struct sdw_intel *sdw) /* Create PCM DAIs */ stream = &cdns->pcm; - ret = intel_create_dai(cdns, dais, INTEL_PDI_IN, stream->num_in, + ret = intel_create_dai(cdns, dais, INTEL_PDI_IN, cdns->pcm.num_in, off, stream->num_ch_in, true); if (ret) return ret; @@ -931,7 +931,7 @@ static int intel_register_dai(struct sdw_intel *sdw) if (ret) return ret; - off += cdns->pdm.num_bd; + off += cdns->pdm.num_out; ret = intel_create_dai(cdns, dais, INTEL_PDI_BD, cdns->pdm.num_bd, off, stream->num_ch_bd, false); if (ret) -- cgit v1.2.3 From 2b319d1f6f92a4ced9897678113d176ee16ae85d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Oct 2019 09:11:40 +0200 Subject: fuse: don't dereference req->args on finished request Move the check for async request after check for the request being already finished and done with. Reported-by: syzbot+ae0bb7aae3de6b4594e2@syzkaller.appspotmail.com Fixes: d49937749fef ("fuse: stop copying args to fuse_req") Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index dadd617d826c..ed1abc9e33cf 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -276,10 +276,12 @@ static void flush_bg_queue(struct fuse_conn *fc) void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) { struct fuse_iqueue *fiq = &fc->iq; - bool async = req->args->end; + bool async; if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; + + async = req->args->end; /* * test_and_set_bit() implies smp_mb() between bit * changing and below intr_entry check. Pairs with -- cgit v1.2.3 From 29c2c6aa32405dfee4a29911a51ba133edcedb0f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 14 Oct 2019 12:51:04 +0300 Subject: pinctrl: intel: Avoid potential glitches if pin is in GPIO mode When consumer requests a pin, in order to be on the safest side, we switch it first to GPIO mode followed by immediate transition to the input state. Due to posted writes it's luckily to be a single I/O transaction. However, if firmware or boot loader already configures the pin to the GPIO mode, user expects no glitches for the requested pin. We may check if the pin is pre-configured and leave it as is till the actual consumer toggles its state to avoid glitches. Fixes: 7981c0015af2 ("pinctrl: intel: Add Intel Sunrisepoint pin controller and GPIO support") Depends-on: f5a26acf0162 ("pinctrl: intel: Initialize GPIO properly when used through irqchip") Cc: stable@vger.kernel.org Cc: fei.yang@intel.com Reported-by: Oliver Barta Reported-by: Malin Jonsson Signed-off-by: Andy Shevchenko Signed-off-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-intel.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index bc013599a9a3..83981ad66a71 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -52,6 +52,7 @@ #define PADCFG0_GPIROUTNMI BIT(17) #define PADCFG0_PMODE_SHIFT 10 #define PADCFG0_PMODE_MASK GENMASK(13, 10) +#define PADCFG0_PMODE_GPIO 0 #define PADCFG0_GPIORXDIS BIT(9) #define PADCFG0_GPIOTXDIS BIT(8) #define PADCFG0_GPIORXSTATE BIT(1) @@ -332,7 +333,7 @@ static void intel_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s, cfg1 = readl(intel_get_padcfg(pctrl, pin, PADCFG1)); mode = (cfg0 & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; - if (!mode) + if (mode == PADCFG0_PMODE_GPIO) seq_puts(s, "GPIO "); else seq_printf(s, "mode %d ", mode); @@ -458,6 +459,11 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static int intel_gpio_get_gpio_mode(void __iomem *padcfg0) +{ + return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; +} + static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) { u32 value; @@ -491,7 +497,20 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, } padcfg0 = intel_get_padcfg(pctrl, pin, PADCFG0); + + /* + * If pin is already configured in GPIO mode, we assume that + * firmware provides correct settings. In such case we avoid + * potential glitches on the pin. Otherwise, for the pin in + * alternative mode, consumer has to supply respective flags. + */ + if (intel_gpio_get_gpio_mode(padcfg0) == PADCFG0_PMODE_GPIO) { + raw_spin_unlock_irqrestore(&pctrl->lock, flags); + return 0; + } + intel_gpio_set_gpio_mode(padcfg0); + /* Disable TX buffer and enable RX (this will be input) */ __intel_gpio_set_direction(padcfg0, true); -- cgit v1.2.3 From 9110d1b0e229cebb1ffce0c04db2b22beffd513d Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Sun, 20 Oct 2019 17:30:06 +0200 Subject: ASoC: msm8916-wcd-analog: Fix RX1 selection in RDAC2 MUX According to the PM8916 Hardware Register Description, CDC_D_CDC_CONN_HPHR_DAC_CTL has only a single bit (RX_SEL) to switch between RX1 (0) and RX2 (1). It is not possible to disable it entirely to achieve the "ZERO" state. However, at the moment the "RDAC2 MUX" mixer defines three possible values ("ZERO", "RX2" and "RX1"). Setting the mixer to "ZERO" actually configures it to RX1. Setting the mixer to "RX1" has (seemingly) no effect. Remove "ZERO" and replace it with "RX1" to fix this. Fixes: 585e881e5b9e ("ASoC: codecs: Add msm8916-wcd analog codec") Signed-off-by: Stephan Gerhold Acked-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20191020153007.206070-1-stephan@gerhold.net Signed-off-by: Mark Brown --- sound/soc/codecs/msm8916-wcd-analog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 667e9f73aba3..e3d311fb510e 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -306,7 +306,7 @@ struct pm8916_wcd_analog_priv { }; static const char *const adc2_mux_text[] = { "ZERO", "INP2", "INP3" }; -static const char *const rdac2_mux_text[] = { "ZERO", "RX2", "RX1" }; +static const char *const rdac2_mux_text[] = { "RX1", "RX2" }; static const char *const hph_text[] = { "ZERO", "Switch", }; static const struct soc_enum hph_enum = SOC_ENUM_SINGLE_VIRT( @@ -321,7 +321,7 @@ static const struct soc_enum adc2_enum = SOC_ENUM_SINGLE_VIRT( /* RDAC2 MUX */ static const struct soc_enum rdac2_mux_enum = SOC_ENUM_SINGLE( - CDC_D_CDC_CONN_HPHR_DAC_CTL, 0, 3, rdac2_mux_text); + CDC_D_CDC_CONN_HPHR_DAC_CTL, 0, 2, rdac2_mux_text); static const struct snd_kcontrol_new spkr_switch[] = { SOC_DAPM_SINGLE("Switch", CDC_A_SPKR_DAC_CTL, 7, 1, 0) -- cgit v1.2.3 From d3645b055399538415586ebaacaedebc1e5899b0 Mon Sep 17 00:00:00 2001 From: Xiaojun Sang Date: Mon, 21 Oct 2019 10:54:32 +0100 Subject: ASoC: compress: fix unsigned integer overflow check Parameter fragments and fragment_size are type of u32. U32_MAX is the correct check. Signed-off-by: Xiaojun Sang Signed-off-by: Srinivas Kandagatla Acked-by: Vinod Koul Link: https://lore.kernel.org/r/20191021095432.5639-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/core/compress_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 99b882158705..942af8c29b79 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -528,7 +528,7 @@ static int snd_compress_check_input(struct snd_compr_params *params) { /* first let's check the buffer parameter's */ if (params->buffer.fragment_size == 0 || - params->buffer.fragments > INT_MAX / params->buffer.fragment_size || + params->buffer.fragments > U32_MAX / params->buffer.fragment_size || params->buffer.fragments == 0) return -EINVAL; -- cgit v1.2.3 From 6c26f71759a6efc04b888dd2c1cc4f1cac38cdf0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Oct 2019 15:57:07 +0200 Subject: fuse: don't advise readdirplus for negative lookup If the FUSE_READDIRPLUS_AUTO feature is enabled, then lookups on a directory before/during readdir are used as an indication that READDIRPLUS should be used instead of READDIR. However if the lookup turns out to be negative, then selecting READDIRPLUS makes no sense. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d572c900bb0f..b77954a27538 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -405,7 +405,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, else fuse_invalidate_entry_cache(entry); - fuse_advise_use_readdirplus(dir); + if (inode) + fuse_advise_use_readdirplus(dir); return newent; out_iput: -- cgit v1.2.3 From 51fecdd2555b3e0e05a78d30093c638d164a32f9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Oct 2019 13:46:22 -0400 Subject: virtiofs: Do not end request in submission context Submission context can hold some locks which end request code tries to hold again and deadlock can occur. For example, fc->bg_lock. If a background request is being submitted, it might hold fc->bg_lock and if we could not submit request (because device went away) and tried to end request, then deadlock happens. During testing, I also got a warning from deadlock detection code. So put requests on a list and end requests from a worker thread. I got following warning from deadlock detector. [ 603.137138] WARNING: possible recursive locking detected [ 603.137142] -------------------------------------------- [ 603.137144] blogbench/2036 is trying to acquire lock: [ 603.137149] 00000000f0f51107 (&(&fc->bg_lock)->rlock){+.+.}, at: fuse_request_end+0xdf/0x1c0 [fuse] [ 603.140701] [ 603.140701] but task is already holding lock: [ 603.140703] 00000000f0f51107 (&(&fc->bg_lock)->rlock){+.+.}, at: fuse_simple_background+0x92/0x1d0 [fuse] [ 603.140713] [ 603.140713] other info that might help us debug this: [ 603.140714] Possible unsafe locking scenario: [ 603.140714] [ 603.140715] CPU0 [ 603.140716] ---- [ 603.140716] lock(&(&fc->bg_lock)->rlock); [ 603.140718] lock(&(&fc->bg_lock)->rlock); [ 603.140719] [ 603.140719] *** DEADLOCK *** Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index e22a0c003c3d..7ea58606cc1d 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -30,6 +30,7 @@ struct virtio_fs_vq { struct virtqueue *vq; /* protected by ->lock */ struct work_struct done_work; struct list_head queued_reqs; + struct list_head end_reqs; /* End these requests */ struct delayed_work dispatch_work; struct fuse_dev *fud; bool connected; @@ -259,8 +260,27 @@ static void virtio_fs_hiprio_done_work(struct work_struct *work) spin_unlock(&fsvq->lock); } -static void virtio_fs_dummy_dispatch_work(struct work_struct *work) +static void virtio_fs_request_dispatch_work(struct work_struct *work) { + struct fuse_req *req; + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, + dispatch_work.work); + struct fuse_conn *fc = fsvq->fud->fc; + + pr_debug("virtio-fs: worker %s called.\n", __func__); + while (1) { + spin_lock(&fsvq->lock); + req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, + list); + if (!req) { + spin_unlock(&fsvq->lock); + return; + } + + list_del_init(&req->list); + spin_unlock(&fsvq->lock); + fuse_request_end(fc, req); + } } static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) @@ -502,6 +522,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); + INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs); INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, virtio_fs_hiprio_dispatch_work); spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); @@ -511,8 +532,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, spin_lock_init(&fs->vqs[i].lock); INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, - virtio_fs_dummy_dispatch_work); + virtio_fs_request_dispatch_work); INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); + INIT_LIST_HEAD(&fs->vqs[i].end_reqs); snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), "requests.%u", i - VQ_REQUEST); callbacks[i] = virtio_fs_vq_done; @@ -918,6 +940,7 @@ __releases(fiq->lock) struct fuse_conn *fc; struct fuse_req *req; struct fuse_pqueue *fpq; + struct virtio_fs_vq *fsvq; int ret; WARN_ON(list_empty(&fiq->pending)); @@ -951,7 +974,8 @@ __releases(fiq->lock) smp_mb__after_atomic(); retry: - ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req); + fsvq = &fs->vqs[queue_id]; + ret = virtio_fs_enqueue_req(fsvq, req); if (ret < 0) { if (ret == -ENOMEM || ret == -ENOSPC) { /* Virtqueue full. Retry submission */ @@ -965,7 +989,12 @@ retry: clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); spin_unlock(&fpq->lock); - fuse_request_end(fc, req); + + /* Can't end request in submission context. Use a worker */ + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->end_reqs); + schedule_delayed_work(&fsvq->dispatch_work, 0); + spin_unlock(&fsvq->lock); return; } } -- cgit v1.2.3 From 7ee1e2e631dbf0ff0df2a67a1e01ba3c1dce7a46 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Oct 2019 13:46:23 -0400 Subject: virtiofs: No need to check fpq->connected state In virtiofs we keep per queue connected state in virtio_fs_vq->connected and use that to end request if queue is not connected. And virtiofs does not even touch fpq->connected state. We probably need to merge these two at some point of time. For now, simplify the code a bit and do not worry about checking state of fpq->connected. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 7ea58606cc1d..a2724b77221d 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -960,13 +960,6 @@ __releases(fiq->lock) fpq = &fs->vqs[queue_id].fud->pq; spin_lock(&fpq->lock); - if (!fpq->connected) { - spin_unlock(&fpq->lock); - req->out.h.error = -ENODEV; - pr_err("virtio-fs: %s disconnected\n", __func__); - fuse_request_end(fc, req); - return; - } list_add_tail(&req->list, fpq->processing); spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); -- cgit v1.2.3 From 5dbe190f341206a7896f7e40c1e3a36933d812f3 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Oct 2019 13:46:24 -0400 Subject: virtiofs: Set FR_SENT flag only after request has been sent FR_SENT flag should be set when request has been sent successfully sent over virtqueue. This is used by interrupt logic to figure out if interrupt request should be sent or not. Also add it to fqp->processing list after sending it successfully. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index a2724b77221d..6d153e70c87b 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -857,6 +857,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, unsigned int i; int ret; bool notify; + struct fuse_pqueue *fpq; /* Does the sglist fit on the stack? */ total_sgs = sg_count_fuse_req(req); @@ -911,6 +912,15 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, goto out; } + /* Request successfully sent. */ + fpq = &fsvq->fud->pq; + spin_lock(&fpq->lock); + list_add_tail(&req->list, fpq->processing); + spin_unlock(&fpq->lock); + set_bit(FR_SENT, &req->flags); + /* matches barrier in request_wait_answer() */ + smp_mb__after_atomic(); + fsvq->in_flight++; notify = virtqueue_kick_prepare(vq); @@ -939,7 +949,6 @@ __releases(fiq->lock) struct virtio_fs *fs; struct fuse_conn *fc; struct fuse_req *req; - struct fuse_pqueue *fpq; struct virtio_fs_vq *fsvq; int ret; @@ -958,14 +967,6 @@ __releases(fiq->lock) req->in.h.nodeid, req->in.h.len, fuse_len_args(req->args->out_numargs, req->args->out_args)); - fpq = &fs->vqs[queue_id].fud->pq; - spin_lock(&fpq->lock); - list_add_tail(&req->list, fpq->processing); - spin_unlock(&fpq->lock); - set_bit(FR_SENT, &req->flags); - /* matches barrier in request_wait_answer() */ - smp_mb__after_atomic(); - retry: fsvq = &fs->vqs[queue_id]; ret = virtio_fs_enqueue_req(fsvq, req); @@ -978,10 +979,6 @@ retry: } req->out.h.error = ret; pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); - spin_lock(&fpq->lock); - clear_bit(FR_SENT, &req->flags); - list_del_init(&req->list); - spin_unlock(&fpq->lock); /* Can't end request in submission context. Use a worker */ spin_lock(&fsvq->lock); -- cgit v1.2.3 From c17ea009610366146ec409fd6dc277e0f2510b10 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Oct 2019 13:46:25 -0400 Subject: virtiofs: Count pending forgets as in_flight forgets If virtqueue is full, we put forget requests on a list and these forgets are dispatched later using a worker. As of now we don't count these forgets in fsvq->in_flight variable. This means when queue is being drained, we have to have special logic to first drain these pending requests and then wait for fsvq->in_flight to go to zero. By counting pending forgets in fsvq->in_flight, we can get rid of special logic and just wait for in_flight to go to zero. Worker thread will kick and drain all the forgets anyway, leading in_flight to zero. I also need similar logic for normal request queue in next patch where I am about to defer request submission in the worker context if queue is full. This simplifies the code a bit. Also add two helper functions to inc/dec in_flight. Decrement in_flight helper will later used to call completion when in_flight reaches zero. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 6d153e70c87b..3ea613d5e34f 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -67,6 +67,19 @@ static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) return &vq_to_fsvq(vq)->fud->pq; } +/* Should be called with fsvq->lock held. */ +static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) +{ + fsvq->in_flight++; +} + +/* Should be called with fsvq->lock held. */ +static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) +{ + WARN_ON(fsvq->in_flight <= 0); + fsvq->in_flight--; +} + static void release_virtio_fs_obj(struct kref *ref) { struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); @@ -110,22 +123,6 @@ static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) flush_delayed_work(&fsvq->dispatch_work); } -static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq) -{ - struct virtio_fs_forget *forget; - - spin_lock(&fsvq->lock); - while (1) { - forget = list_first_entry_or_null(&fsvq->queued_reqs, - struct virtio_fs_forget, list); - if (!forget) - break; - list_del(&forget->list); - kfree(forget); - } - spin_unlock(&fsvq->lock); -} - static void virtio_fs_drain_all_queues(struct virtio_fs *fs) { struct virtio_fs_vq *fsvq; @@ -133,9 +130,6 @@ static void virtio_fs_drain_all_queues(struct virtio_fs *fs) for (i = 0; i < fs->nvqs; i++) { fsvq = &fs->vqs[i]; - if (i == VQ_HIPRIO) - drain_hiprio_queued_reqs(fsvq); - virtio_fs_drain_queue(fsvq); } } @@ -254,7 +248,7 @@ static void virtio_fs_hiprio_done_work(struct work_struct *work) while ((req = virtqueue_get_buf(vq, &len)) != NULL) { kfree(req); - fsvq->in_flight--; + dec_in_flight_req(fsvq); } } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); spin_unlock(&fsvq->lock); @@ -306,6 +300,7 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) list_del(&forget->list); if (!fsvq->connected) { + dec_in_flight_req(fsvq); spin_unlock(&fsvq->lock); kfree(forget); continue; @@ -327,13 +322,13 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) } else { pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", ret); + dec_in_flight_req(fsvq); kfree(forget); } spin_unlock(&fsvq->lock); return; } - fsvq->in_flight++; notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); @@ -472,7 +467,7 @@ static void virtio_fs_requests_done_work(struct work_struct *work) fuse_request_end(fc, req); spin_lock(&fsvq->lock); - fsvq->in_flight--; + dec_in_flight_req(fsvq); spin_unlock(&fsvq->lock); } } @@ -730,6 +725,7 @@ __releases(fiq->lock) list_add_tail(&forget->list, &fsvq->queued_reqs); schedule_delayed_work(&fsvq->dispatch_work, msecs_to_jiffies(1)); + inc_in_flight_req(fsvq); } else { pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", ret); @@ -739,7 +735,7 @@ __releases(fiq->lock) goto out; } - fsvq->in_flight++; + inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); @@ -921,7 +917,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); - fsvq->in_flight++; + inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); -- cgit v1.2.3 From a9bfd9dd3417561d06c81de04f6d6c1e0c9b3d44 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Oct 2019 13:46:26 -0400 Subject: virtiofs: Retry request submission from worker context If regular request queue gets full, currently we sleep for a bit and retrying submission in submitter's context. This assumes submitter is not holding any spin lock. But this assumption is not true for background requests. For background requests, we are called with fc->bg_lock held. This can lead to deadlock where one thread is trying submission with fc->bg_lock held while request completion thread has called fuse_request_end() which tries to acquire fc->bg_lock and gets blocked. As request completion thread gets blocked, it does not make further progress and that means queue does not get empty and submitter can't submit more requests. To solve this issue, retry submission with the help of a worker, instead of retrying in submitter's context. We already do this for hiprio/forget requests. Reported-by: Chirantan Ekbote Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 61 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 3ea613d5e34f..2de8fc0d6a24 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -55,6 +55,9 @@ struct virtio_fs_forget { struct list_head list; }; +static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, + struct fuse_req *req, bool in_flight); + static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) { struct virtio_fs *fs = vq->vdev->priv; @@ -260,6 +263,7 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work) struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, dispatch_work.work); struct fuse_conn *fc = fsvq->fud->fc; + int ret; pr_debug("virtio-fs: worker %s called.\n", __func__); while (1) { @@ -268,13 +272,45 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work) list); if (!req) { spin_unlock(&fsvq->lock); - return; + break; } list_del_init(&req->list); spin_unlock(&fsvq->lock); fuse_request_end(fc, req); } + + /* Dispatch pending requests */ + while (1) { + spin_lock(&fsvq->lock); + req = list_first_entry_or_null(&fsvq->queued_reqs, + struct fuse_req, list); + if (!req) { + spin_unlock(&fsvq->lock); + return; + } + list_del_init(&req->list); + spin_unlock(&fsvq->lock); + + ret = virtio_fs_enqueue_req(fsvq, req, true); + if (ret < 0) { + if (ret == -ENOMEM || ret == -ENOSPC) { + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->queued_reqs); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + spin_unlock(&fsvq->lock); + return; + } + req->out.h.error = ret; + spin_lock(&fsvq->lock); + dec_in_flight_req(fsvq); + spin_unlock(&fsvq->lock); + pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", + ret); + fuse_request_end(fc, req); + } + } } static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) @@ -837,7 +873,7 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg, /* Add a request to a virtqueue and kick the device */ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, - struct fuse_req *req) + struct fuse_req *req, bool in_flight) { /* requests need at least 4 elements */ struct scatterlist *stack_sgs[6]; @@ -917,7 +953,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); - inc_in_flight_req(fsvq); + if (!in_flight) + inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); @@ -963,15 +1000,21 @@ __releases(fiq->lock) req->in.h.nodeid, req->in.h.len, fuse_len_args(req->args->out_numargs, req->args->out_args)); -retry: fsvq = &fs->vqs[queue_id]; - ret = virtio_fs_enqueue_req(fsvq, req); + ret = virtio_fs_enqueue_req(fsvq, req, false); if (ret < 0) { if (ret == -ENOMEM || ret == -ENOSPC) { - /* Virtqueue full. Retry submission */ - /* TODO use completion instead of timeout */ - usleep_range(20, 30); - goto retry; + /* + * Virtqueue full. Retry submission from worker + * context as we might be holding fc->bg_lock. + */ + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->queued_reqs); + inc_in_flight_req(fsvq); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + spin_unlock(&fsvq->lock); + return; } req->out.h.error = ret; pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); -- cgit v1.2.3 From 63bdef6cd6941917c823b9cc9aa0219d19fcb716 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 18 Oct 2019 11:08:42 +0200 Subject: pinctrl: cherryview: Fix irq_valid_mask calculation Commit 03c4749dd6c7 ("gpio / ACPI: Drop unnecessary ACPI GPIO to Linux GPIO translation") has made the cherryview gpio numbers sparse, to get a 1:1 mapping between ACPI pin numbers and gpio numbers in Linux. This has greatly simplified things, but the code setting the irq_valid_mask was not updated for this, so the valid mask is still in the old "compressed" numbering with the gaps in the pin numbers skipped, which is wrong as irq_valid_mask needs to be expressed in gpio numbers. This results in the following error on devices using pin 24 (0x0018) on the north GPIO controller as an ACPI event source: [ 0.422452] cherryview-pinctrl INT33FF:01: Failed to translate GPIO to IRQ This has been reported (by email) to be happening on a Caterpillar CAT T20 tablet and I've reproduced this myself on a Medion Akoya e2215t 2-in-1. This commit uses the pin number instead of the compressed index into community->pins to clear the correct bits in irq_valid_mask for GPIOs using GPEs for interrupts, fixing these errors and in case of the Medion Akoya e2215t also fixing the LID switch not working. Cc: stable@vger.kernel.org Fixes: 03c4749dd6c7 ("gpio / ACPI: Drop unnecessary ACPI GPIO to Linux GPIO translation") Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-cherryview.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index c6251eac8946..c31266e70559 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1559,7 +1559,7 @@ static void chv_init_irq_valid_mask(struct gpio_chip *chip, intsel >>= CHV_PADCTRL0_INTSEL_SHIFT; if (intsel >= community->nirqs) - clear_bit(i, valid_mask); + clear_bit(desc->number, valid_mask); } } -- cgit v1.2.3 From 40a6b9a00930fd6b59aa2eb6135abc2efe5440c3 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 21 Oct 2019 12:41:40 +0200 Subject: Revert "pwm: Let pwm_get_state() return the last implemented state" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that commit 01ccf903edd6 ("pwm: Let pwm_get_state() return the last implemented state") causes backlight failures on a number of boards. The reason is that some of the drivers do not write the full state through to the hardware registers, which means that ->get_state() subsequently does not return the correct state. Consumers which rely on pwm_get_state() returning the current state will therefore get confused and subsequently try to program a bad state. Before this change can be made, existing drivers need to be more carefully audited and fixed to behave as the framework expects. Until then, keep the original behaviour of returning the software state that was applied rather than reading the state back from hardware. Reviewed-by: Uwe Kleine-König Tested-by: Enric Balletbo i Serra Tested-by: Michal Vokáč Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 6ad51aa60c03..f877e77d9184 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -472,14 +472,7 @@ int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state) if (err) return err; - /* - * .apply might have to round some values in *state, if possible - * read the actually implemented value back. - */ - if (chip->ops->get_state) - chip->ops->get_state(chip, pwm, &pwm->state); - else - pwm->state = *state; + pwm->state = *state; } else { /* * FIXME: restore the initial state in case of error. -- cgit v1.2.3 From e7a409c3f46cb0dbc7bfd4f6f9421d53e92614a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Oct 2019 09:26:37 -0700 Subject: ipv4: fix IPSKB_FRAG_PMTU handling with fragmentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch removes the iph field from the state structure, which is not properly initialized. Instead, add a new field to make the "do we want to set DF" be the state bit and move the code to set the DF flag from ip_frag_next(). Joint work with Pablo and Linus. Fixes: 19c3401a917b ("net: ipv4: place control buffer handling away from fragmentation iterators") Reported-by: Patrick Schönthaler Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: Linus Torvalds Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++-- net/bridge/netfilter/nf_conntrack_bridge.c | 2 +- net/ipv4/ip_output.c | 11 ++++++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 95bb77f95bcc..a2c61c36dc4a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -185,7 +185,7 @@ static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter) } struct ip_frag_state { - struct iphdr *iph; + bool DF; unsigned int hlen; unsigned int ll_rs; unsigned int mtu; @@ -196,7 +196,7 @@ struct ip_frag_state { }; void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs, - unsigned int mtu, struct ip_frag_state *state); + unsigned int mtu, bool DF, struct ip_frag_state *state); struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state); diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 506d6141e44e..809673222382 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -95,7 +95,7 @@ slow_path: * This may also be a clone skbuff, we could preserve the geometry for * the copies but probably not worth the effort. */ - ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state); + ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state); while (state.left > 0) { struct sk_buff *skb2; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 814b9b8882a0..3d8baaaf7086 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -645,11 +645,12 @@ void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter) EXPORT_SYMBOL(ip_fraglist_prepare); void ip_frag_init(struct sk_buff *skb, unsigned int hlen, - unsigned int ll_rs, unsigned int mtu, + unsigned int ll_rs, unsigned int mtu, bool DF, struct ip_frag_state *state) { struct iphdr *iph = ip_hdr(skb); + state->DF = DF; state->hlen = hlen; state->ll_rs = ll_rs; state->mtu = mtu; @@ -668,9 +669,6 @@ static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to, /* Copy the flags to each fragment. */ IPCB(to)->flags = IPCB(from)->flags; - if (IPCB(from)->flags & IPSKB_FRAG_PMTU) - state->iph->frag_off |= htons(IP_DF); - /* ANK: dirty, but effective trick. Upgrade options only if * the segment to be fragmented was THE FIRST (otherwise, * options are already fixed) and make it ONCE @@ -738,6 +736,8 @@ struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state) */ iph = ip_hdr(skb2); iph->frag_off = htons((state->offset >> 3)); + if (state->DF) + iph->frag_off |= htons(IP_DF); /* * Added AC : If we are fragmenting a fragment that's not the @@ -883,7 +883,8 @@ slow_path: * Fragment the datagram. */ - ip_frag_init(skb, hlen, ll_rs, mtu, &state); + ip_frag_init(skb, hlen, ll_rs, mtu, IPCB(skb)->flags & IPSKB_FRAG_PMTU, + &state); /* * Keep copying data until we run out. -- cgit v1.2.3 From b3060531979422d5bb18d80226f978910284dc70 Mon Sep 17 00:00:00 2001 From: Kazutoshi Noguchi Date: Mon, 21 Oct 2019 00:03:07 +0900 Subject: r8152: add device id for Lenovo ThinkPad USB-C Dock Gen 2 This device is sold as 'ThinkPad USB-C Dock Gen 2 (40AS)'. Chipset is RTL8153 and works with r8152. Without this, the generic cdc_ether grabs the device, and the device jam connected networks up when the machine suspends. Signed-off-by: Kazutoshi Noguchi Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 7 +++++++ drivers/net/usb/r8152.c | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 32f53de5b1fe..fe630438f67b 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -787,6 +787,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index cee9fef925cd..d4a95b50bda6 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5755,6 +5755,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387)}, {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, {REALTEK_USB_DEVICE(VENDOR_ID_TPLINK, 0x0601)}, -- cgit v1.2.3 From ce197d83a9fc42795c248c90983bf05faf0f013b Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Sat, 19 Oct 2019 13:19:31 +0200 Subject: xdp: Handle device unregister for devmap_hash map type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems I forgot to add handling of devmap_hash type maps to the device unregister hook for devmaps. This omission causes devices to not be properly released, which causes hangs. Fix this by adding the missing handler. Fixes: 6f9d451ab1a3 ("xdp: Add devmap_hash map type for looking up devices by hashed index") Reported-by: Tetsuo Handa Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191019111931.2981954-1-toke@redhat.com --- kernel/bpf/devmap.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index c0a48f336997..3867864cdc2f 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -719,6 +719,32 @@ const struct bpf_map_ops dev_map_hash_ops = { .map_check_btf = map_check_no_btf, }; +static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab, + struct net_device *netdev) +{ + unsigned long flags; + u32 i; + + spin_lock_irqsave(&dtab->index_lock, flags); + for (i = 0; i < dtab->n_buckets; i++) { + struct bpf_dtab_netdev *dev; + struct hlist_head *head; + struct hlist_node *next; + + head = dev_map_index_hash(dtab, i); + + hlist_for_each_entry_safe(dev, next, head, index_hlist) { + if (netdev != dev->dev) + continue; + + dtab->items--; + hlist_del_rcu(&dev->index_hlist); + call_rcu(&dev->rcu, __dev_map_entry_free); + } + } + spin_unlock_irqrestore(&dtab->index_lock, flags); +} + static int dev_map_notification(struct notifier_block *notifier, ulong event, void *ptr) { @@ -735,6 +761,11 @@ static int dev_map_notification(struct notifier_block *notifier, */ rcu_read_lock(); list_for_each_entry_rcu(dtab, &dev_map_list, list) { + if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + dev_map_hash_remove_netdev(dtab, netdev); + continue; + } + for (i = 0; i < dtab->map.max_entries; i++) { struct bpf_dtab_netdev *dev, *odev; -- cgit v1.2.3 From 532f9cd6ee994ed10403e856ca27501428048597 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 22 Oct 2019 14:06:23 +0530 Subject: reset: Fix memory leak in reset_control_array_put() Memory allocated for 'struct reset_control_array' in of_reset_control_array_get() is never freed in reset_control_array_put() resulting in kmemleak showing the following backtrace. backtrace: [<00000000c5f17595>] __kmalloc+0x1b0/0x2b0 [<00000000bd499e13>] of_reset_control_array_get+0xa4/0x180 [<000000004cc02754>] 0xffff800008c669e4 [<0000000050a83b24>] platform_drv_probe+0x50/0xa0 [<00000000d3a0b0bc>] really_probe+0x108/0x348 [<000000005aa458ac>] driver_probe_device+0x58/0x100 [<000000008853626c>] device_driver_attach+0x6c/0x90 [<0000000085308d19>] __driver_attach+0x84/0xc8 [<00000000080d35f2>] bus_for_each_dev+0x74/0xc8 [<00000000dd7f015b>] driver_attach+0x20/0x28 [<00000000923ba6e6>] bus_add_driver+0x148/0x1f0 [<0000000061473b66>] driver_register+0x60/0x110 [<00000000c5bec167>] __platform_driver_register+0x40/0x48 [<000000007c764b4f>] 0xffff800008c6c020 [<0000000047ec2e8c>] do_one_initcall+0x5c/0x1b0 [<0000000093d4b50d>] do_init_module+0x54/0x1d0 Fixes: 17c82e206d2a ("reset: Add APIs to manage array of resets") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 213ff40dda11..36b1ff69b1e2 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -748,6 +748,7 @@ static void reset_control_array_put(struct reset_control_array *resets) for (i = 0; i < resets->num_rstcs; i++) __reset_control_put_internal(resets->rstc[i]); mutex_unlock(&reset_list_mutex); + kfree(resets); } /** -- cgit v1.2.3 From 6e82ae6b8d11b948b74e71396efd8e074c415f44 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Wed, 16 Oct 2019 14:43:28 +0000 Subject: iio: imu: inv_mpu6050: fix no data on MPU6050 Some chips have a fifo overflow bit issue where the bit is always set. The result is that every data is dropped. Change fifo overflow management by checking fifo count against a maximum value. Add fifo size in chip hardware set of values. Fixes: f5057e7b2dba ("iio: imu: inv_mpu6050: better fifo overflow handling") Cc: stable@vger.kernel.org Signed-off-by: Jean-Baptiste Maneyrol Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 9 +++++++++ drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h | 2 ++ drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c | 15 ++++++++++++--- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index b17f060b52fc..868281b8adb0 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -114,54 +114,63 @@ static const struct inv_mpu6050_hw hw_info[] = { .name = "MPU6050", .reg = ®_set_6050, .config = &chip_config_6050, + .fifo_size = 1024, }, { .whoami = INV_MPU6500_WHOAMI_VALUE, .name = "MPU6500", .reg = ®_set_6500, .config = &chip_config_6050, + .fifo_size = 512, }, { .whoami = INV_MPU6515_WHOAMI_VALUE, .name = "MPU6515", .reg = ®_set_6500, .config = &chip_config_6050, + .fifo_size = 512, }, { .whoami = INV_MPU6000_WHOAMI_VALUE, .name = "MPU6000", .reg = ®_set_6050, .config = &chip_config_6050, + .fifo_size = 1024, }, { .whoami = INV_MPU9150_WHOAMI_VALUE, .name = "MPU9150", .reg = ®_set_6050, .config = &chip_config_6050, + .fifo_size = 1024, }, { .whoami = INV_MPU9250_WHOAMI_VALUE, .name = "MPU9250", .reg = ®_set_6500, .config = &chip_config_6050, + .fifo_size = 512, }, { .whoami = INV_MPU9255_WHOAMI_VALUE, .name = "MPU9255", .reg = ®_set_6500, .config = &chip_config_6050, + .fifo_size = 512, }, { .whoami = INV_ICM20608_WHOAMI_VALUE, .name = "ICM20608", .reg = ®_set_6500, .config = &chip_config_6050, + .fifo_size = 512, }, { .whoami = INV_ICM20602_WHOAMI_VALUE, .name = "ICM20602", .reg = ®_set_icm20602, .config = &chip_config_6050, + .fifo_size = 1008, }, }; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h index db1c6904388b..51235677c534 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h @@ -100,12 +100,14 @@ struct inv_mpu6050_chip_config { * @name: name of the chip. * @reg: register map of the chip. * @config: configuration of the chip. + * @fifo_size: size of the FIFO in bytes. */ struct inv_mpu6050_hw { u8 whoami; u8 *name; const struct inv_mpu6050_reg_map *reg; const struct inv_mpu6050_chip_config *config; + size_t fifo_size; }; /* diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c index 5f9a5de0bab4..72d8c5790076 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c @@ -180,9 +180,6 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p) "failed to ack interrupt\n"); goto flush_fifo; } - /* handle fifo overflow by reseting fifo */ - if (int_status & INV_MPU6050_BIT_FIFO_OVERFLOW_INT) - goto flush_fifo; if (!(int_status & INV_MPU6050_BIT_RAW_DATA_RDY_INT)) { dev_warn(regmap_get_device(st->map), "spurious interrupt with status 0x%x\n", int_status); @@ -211,6 +208,18 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p) if (result) goto end_session; fifo_count = get_unaligned_be16(&data[0]); + + /* + * Handle fifo overflow by resetting fifo. + * Reset if there is only 3 data set free remaining to mitigate + * possible delay between reading fifo count and fifo data. + */ + nb = 3 * bytes_per_datum; + if (fifo_count >= st->hw->fifo_size - nb) { + dev_warn(regmap_get_device(st->map), "fifo overflow reset\n"); + goto flush_fifo; + } + /* compute and process all complete datum */ nb = fifo_count / bytes_per_datum; inv_mpu6050_update_period(st, pf->timestamp, nb); -- cgit v1.2.3 From 4a6a6f5c4aeedb72db871d60bfcca89835f317aa Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 26 Sep 2019 19:16:41 -0600 Subject: tools: gpio: Use !building_out_of_srctree to determine srctree make TARGETS=gpio kselftest fails with: Makefile:23: tools/build/Makefile.include: No such file or directory When the gpio tool make is invoked from tools Makefile, srctree is cleared and the current logic check for srctree equals to empty string to determine srctree location from CURDIR. When the build in invoked from selftests/gpio Makefile, the srctree is set to "." and the same logic used for srctree equals to empty is needed to determine srctree. Check building_out_of_srctree undefined as the condition for both cases to fix "make TARGETS=gpio kselftest" build failure. Cc: stable@vger.kernel.org Signed-off-by: Shuah Khan Signed-off-by: Bartosz Golaszewski --- tools/gpio/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 6ecdd1067826..1178d302757e 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -3,7 +3,11 @@ include ../scripts/Makefile.include bindir ?= /usr/bin -ifeq ($(srctree),) +# This will work when gpio is built in tools env. where srctree +# isn't set and when invoked from selftests build, where srctree +# is set to ".". building_out_of_srctree is undefined for in srctree +# builds +ifndef building_out_of_srctree srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -- cgit v1.2.3 From 8ca8fa7f22dcb0a3265490a690b0c3e27de681f9 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 18 Oct 2019 14:11:25 +0300 Subject: ARC: [plat-hsdk]: Enable on-board SPI NOR flash IC HSDK board has sst26wf016b SPI NOR flash IC installed, enable it. Acked-by: Alexey Brodkin Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 8 ++++++++ arch/arc/configs/hsdk_defconfig | 2 ++ 2 files changed, 10 insertions(+) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index bfc7f5f5d6f2..9bea5daadd23 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -264,6 +264,14 @@ clocks = <&input_clk>; cs-gpios = <&creg_gpio 0 GPIO_ACTIVE_LOW>, <&creg_gpio 1 GPIO_ACTIVE_LOW>; + + spi-flash@0 { + compatible = "sst26wf016b", "jedec,spi-nor"; + reg = <0>; + #address-cells = <1>; + #size-cells = <1>; + spi-max-frequency = <4000000>; + }; }; creg_gpio: gpio@14b0 { diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 9b9a74444ce2..22fc70396a3b 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -32,6 +32,8 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set +CONFIG_MTD=y +CONFIG_MTD_SPI_NOR=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y -- cgit v1.2.3 From ab563bf54a4d08cb59e7d7bcd419f7e8558a4964 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 18 Oct 2019 14:11:26 +0300 Subject: ARC: [plat-hsdk]: Enable on-boardi SPI ADC IC HSDK board has adc108s102 SPI ADC IC installed, enable it. Acked-by: Alexey Brodkin Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 15 +++++++++++++++ arch/arc/configs/hsdk_defconfig | 4 ++++ 2 files changed, 19 insertions(+) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 9bea5daadd23..9acbeba832c0 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -65,6 +65,14 @@ clock-frequency = <33333333>; }; + reg_5v0: regulator-5v0 { + compatible = "regulator-fixed"; + + regulator-name = "5v0-supply"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + cpu_intc: cpu-interrupt-controller { compatible = "snps,archs-intc"; interrupt-controller; @@ -272,6 +280,13 @@ #size-cells = <1>; spi-max-frequency = <4000000>; }; + + adc@1 { + compatible = "ti,adc108s102"; + reg = <1>; + vref-supply = <®_5v0>; + spi-max-frequency = <1000000>; + }; }; creg_gpio: gpio@14b0 { diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 22fc70396a3b..0974226fab55 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -57,6 +57,8 @@ CONFIG_GPIO_SYSFS=y CONFIG_GPIO_DWAPB=y CONFIG_GPIO_SNPS_CREG=y # CONFIG_HWMON is not set +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_DRM=y # CONFIG_DRM_FBDEV_EMULATION is not set CONFIG_DRM_UDL=y @@ -74,6 +76,8 @@ CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y CONFIG_DMADEVICES=y CONFIG_DW_AXI_DMAC=y +CONFIG_IIO=y +CONFIG_TI_ADC108S102=y CONFIG_EXT3_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y -- cgit v1.2.3 From 5effc09c4907901f0e71e68e5f2e14211d9a203f Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 22 Oct 2019 17:04:11 +0300 Subject: ARC: perf: Accommodate big-endian CPU 8-letter strings representing ARC perf events are stores in two 32-bit registers as ASCII characters like that: "IJMP", "IALL", "IJMPTAK" etc. And the same order of bytes in the word is used regardless CPU endianness. Which means in case of big-endian CPU core we need to swap bytes to get the same order as if it was on little-endian CPU. Otherwise we're seeing the following error message on boot: ------------------------->8---------------------- ARC perf : 8 counters (32 bits), 40 conditions, [overflow IRQ support] sysfs: cannot create duplicate filename '/devices/arc_pct/events/pmji' CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.2.18 #3 Stack Trace: arc_unwind_core+0xd4/0xfc dump_stack+0x64/0x80 sysfs_warn_dup+0x46/0x58 sysfs_add_file_mode_ns+0xb2/0x168 create_files+0x70/0x2a0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at kernel/events/core.c:12144 perf_event_sysfs_init+0x70/0xa0 Failed to register pmu: arc_pct, reason -17 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.2.18 #3 Stack Trace: arc_unwind_core+0xd4/0xfc dump_stack+0x64/0x80 __warn+0x9c/0xd4 warn_slowpath_fmt+0x22/0x2c perf_event_sysfs_init+0x70/0xa0 ---[ end trace a75fb9a9837bd1ec ]--- ------------------------->8---------------------- What happens here we're trying to register more than one raw perf event with the same name "PMJI". Why? Because ARC perf events are 4 to 8 letters and encoded into two 32-bit words. In this particular case we deal with 2 events: * "IJMP____" which counts all jump & branch instructions * "IJMPC___" which counts only conditional jumps & branches Those strings are split in two 32-bit words this way "IJMP" + "____" & "IJMP" + "C___" correspondingly. Now if we read them swapped due to CPU core being big-endian then we read "PMJI" + "____" & "PMJI" + "___C". And since we interpret read array of ASCII letters as a null-terminated string on big-endian CPU we end up with 2 events of the same name "PMJI". Signed-off-by: Alexey Brodkin Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/kernel/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 861a8aea51f9..661fd842ea97 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -614,8 +614,8 @@ static int arc_pmu_device_probe(struct platform_device *pdev) /* loop thru all available h/w condition indexes */ for (i = 0; i < cc_bcr.c; i++) { write_aux_reg(ARC_REG_CC_INDEX, i); - cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); - cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); + cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0)); + cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1)); arc_pmu_map_hw_event(i, cc_name.str); arc_pmu_add_raw_event_attr(i, cc_name.str); -- cgit v1.2.3 From a9018adfde809d44e71189b984fa61cc89682b5e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 11 Oct 2019 16:34:19 +0300 Subject: RDMA/uverbs: Prevent potential underflow The issue is in drivers/infiniband/core/uverbs_std_types_cq.c in the UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE) function. We check that: if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) { But we don't check if "attr.comp_vector" is negative. It could potentially lead to an array underflow. My concern would be where cq->vector is used in the create_cq() function from the cxgb4 driver. And really "attr.comp_vector" is appears as a u32 to user space so that's the right type to use. Fixes: 9ee79fce3642 ("IB/core: Add completion queue (cq) object actions") Link: https://lore.kernel.org/r/20191011133419.GA22905@mwanda Signed-off-by: Dan Carpenter Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 2 +- include/rdma/ib_verbs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 1e5aeb39f774..63f7f7db5902 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -98,7 +98,7 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata, struct ib_uverbs_device { atomic_t refcount; - int num_comp_vectors; + u32 num_comp_vectors; struct completion comp; struct device dev; /* First group for device attributes, NULL terminated array */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6a47ba85c54c..e7e733add99f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -366,7 +366,7 @@ struct ib_tm_caps { struct ib_cq_init_attr { unsigned int cqe; - int comp_vector; + u32 comp_vector; u32 flags; }; -- cgit v1.2.3 From cd7455f1013ef96d5cbf5c05d2b7c06f273810a6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 22 Oct 2019 15:57:23 +0200 Subject: bpf: Fix use after free in subprog's jited symbol removal syzkaller managed to trigger the following crash: [...] BUG: unable to handle page fault for address: ffffc90001923030 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD aa551067 P4D aa551067 PUD aa552067 PMD a572b067 PTE 80000000a1173163 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 7982 Comm: syz-executor912 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:bpf_jit_binary_hdr include/linux/filter.h:787 [inline] RIP: 0010:bpf_get_prog_addr_region kernel/bpf/core.c:531 [inline] RIP: 0010:bpf_tree_comp kernel/bpf/core.c:600 [inline] RIP: 0010:__lt_find include/linux/rbtree_latch.h:115 [inline] RIP: 0010:latch_tree_find include/linux/rbtree_latch.h:208 [inline] RIP: 0010:bpf_prog_kallsyms_find kernel/bpf/core.c:674 [inline] RIP: 0010:is_bpf_text_address+0x184/0x3b0 kernel/bpf/core.c:709 [...] Call Trace: kernel_text_address kernel/extable.c:147 [inline] __kernel_text_address+0x9a/0x110 kernel/extable.c:102 unwind_get_return_address+0x4c/0x90 arch/x86/kernel/unwind_frame.c:19 arch_stack_walk+0x98/0xe0 arch/x86/kernel/stacktrace.c:26 stack_trace_save+0xb6/0x150 kernel/stacktrace.c:123 save_stack mm/kasan/common.c:69 [inline] set_track mm/kasan/common.c:77 [inline] __kasan_kmalloc+0x11c/0x1b0 mm/kasan/common.c:510 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:518 slab_post_alloc_hook mm/slab.h:584 [inline] slab_alloc mm/slab.c:3319 [inline] kmem_cache_alloc+0x1f5/0x2e0 mm/slab.c:3483 getname_flags+0xba/0x640 fs/namei.c:138 getname+0x19/0x20 fs/namei.c:209 do_sys_open+0x261/0x560 fs/open.c:1091 __do_sys_open fs/open.c:1115 [inline] __se_sys_open fs/open.c:1110 [inline] __x64_sys_open+0x87/0x90 fs/open.c:1110 do_syscall_64+0xf7/0x1c0 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe [...] After further debugging it turns out that we walk kallsyms while in parallel we tear down a BPF program which contains subprograms that have been JITed though the program itself has not been fully exposed and is eventually bailing out with error. The bpf_prog_kallsyms_del_subprogs() in bpf_prog_load()'s error path removes the symbols, however, bpf_prog_free() tears down the JIT memory too early via scheduled work. Instead, it needs to properly respect RCU grace period as the kallsyms walk for BPF is under RCU. Fix it by refactoring __bpf_prog_put()'s tear down and reuse it in our error path where we defer final destruction when we have subprogs in the program. Fixes: 7d1982b4e335 ("bpf: fix panic in prog load calls cleanup") Fixes: 1c2a088a6626 ("bpf: x64: add JIT support for multi-function programs") Reported-by: syzbot+710043c5d1d5b5013bc7@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Tested-by: syzbot+710043c5d1d5b5013bc7@syzkaller.appspotmail.com Link: https://lore.kernel.org/bpf/55f6367324c2d7e9583fa9ccf5385dcbba0d7a6e.1571752452.git.daniel@iogearbox.net --- include/linux/filter.h | 1 - kernel/bpf/core.c | 2 +- kernel/bpf/syscall.c | 31 ++++++++++++++++++++----------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 2ce57645f3cd..0367a75f873b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1099,7 +1099,6 @@ static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) #endif /* CONFIG_BPF_JIT */ -void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp); void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); #define BPF_ANC BIT(15) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 66088a9e9b9e..ef0e1e3e66f4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -502,7 +502,7 @@ int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false)); } -void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) +static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) { int i; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 82eabd4e38ad..bcfc362de4f2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1332,18 +1332,26 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) bpf_prog_free(aux->prog); } +static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) +{ + bpf_prog_kallsyms_del_all(prog); + btf_put(prog->aux->btf); + kvfree(prog->aux->func_info); + bpf_prog_free_linfo(prog); + + if (deferred) + call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); + else + __bpf_prog_put_rcu(&prog->aux->rcu); +} + static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { if (atomic_dec_and_test(&prog->aux->refcnt)) { perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); - bpf_prog_kallsyms_del_all(prog); - btf_put(prog->aux->btf); - kvfree(prog->aux->func_info); - bpf_prog_free_linfo(prog); - - call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); + __bpf_prog_put_noref(prog, true); } } @@ -1741,11 +1749,12 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) return err; free_used_maps: - bpf_prog_free_linfo(prog); - kvfree(prog->aux->func_info); - btf_put(prog->aux->btf); - bpf_prog_kallsyms_del_subprogs(prog); - free_used_maps(prog->aux); + /* In case we have subprogs, we need to wait for a grace + * period before we can tear down JIT memory since symbols + * are already exposed under kallsyms. + */ + __bpf_prog_put_noref(prog, prog->aux->func_cnt); + return err; free_prog: bpf_prog_uncharge_memlock(prog); free_prog_sec: -- cgit v1.2.3 From 011c7289de619d76f82b0d9e9fec3f59d2fe57f7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Sep 2019 21:55:11 +0200 Subject: dynamic_debug: provide dynamic_hex_dump stub The ionic driver started using dymamic_hex_dump(), but that is not always defined: drivers/net/ethernet/pensando/ionic/ionic_main.c:229:2: error: implicit declaration of function 'dynamic_hex_dump' [-Werror,-Wimplicit-function-declaration] Add a dummy implementation to use when CONFIG_DYNAMIC_DEBUG is disabled, printing nothing. Fixes: 938962d55229 ("ionic: Add adminq action") Signed-off-by: Arnd Bergmann Acked-by: Shannon Nelson Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 ++ drivers/net/ethernet/pensando/ionic/ionic_main.c | 2 ++ include/linux/dynamic_debug.h | 6 ++++++ 3 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 72107a0627a9..20faa8d24c9f 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ +#include +#include #include #include #include diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 15e432386b35..aab311413412 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ +#include +#include #include #include #include diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 6c809440f319..4cf02ecd67de 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -204,6 +204,12 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0) #define dynamic_dev_dbg(dev, fmt, ...) \ do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0) +#define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ + groupsize, buf, len, ascii) \ + do { if (0) \ + print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, \ + rowsize, groupsize, buf, len, ascii); \ + } while (0) #endif #endif -- cgit v1.2.3 From 34c15202896d11e3974788daf9005a84ec45f7a2 Mon Sep 17 00:00:00 2001 From: yuqi jin Date: Mon, 21 Oct 2019 11:27:34 +0800 Subject: net: stmmac: Fix the problem of tso_xmit When the address width of DMA is greater than 32, the packet header occupies a BD descriptor. The starting address of the data should be added to the header length. Fixes: a993db88d17d ("net: stmmac: Enable support for > 32 Bits addressing in XGMAC") Cc: Eric Dumazet Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Cc: Jose Abreu Cc: "David S. Miller" Cc: Maxime Coquelin Signed-off-by: yuqi jin Signed-off-by: Shaokun Zhang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3dfd04e0506a..4e9c848c67cc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2995,6 +2995,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) } else { stmmac_set_desc_addr(priv, first, des); tmp_pay_len = pay_len; + des += proto_hdr_len; } stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue); -- cgit v1.2.3 From c329230ce886f449a6e559b636096b75ab00d18a Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 21 Oct 2019 01:34:25 -0400 Subject: bnxt_en: Fix the size of devlink MSIX parameters. The current code that rounds up the NVRAM parameter bit size to the next byte size for the devlink parameter is not always correct. The MSIX devlink parameters are 4 bytes and we don't get the correct size using this method. Fix it by adding a new dl_num_bytes member to the bnxt_dl_nvm_param structure which statically provides bytesize information according to the devlink parameter type definition. Fixes: 782a624d00fa ("bnxt_en: Add bnxt_en initial port params table and register it") Cc: Jiri Pirko Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 28 +++++++++++------------ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h | 3 ++- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index e664392dccc0..68f74f52fe88 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -215,15 +215,15 @@ enum bnxt_dl_param_id { static const struct bnxt_dl_nvm_param nvm_params[] = { {DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV, NVM_OFF_ENABLE_SRIOV, - BNXT_NVM_SHARED_CFG, 1}, + BNXT_NVM_SHARED_CFG, 1, 1}, {DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, NVM_OFF_IGNORE_ARI, - BNXT_NVM_SHARED_CFG, 1}, + BNXT_NVM_SHARED_CFG, 1, 1}, {DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, - NVM_OFF_MSIX_VEC_PER_PF_MAX, BNXT_NVM_SHARED_CFG, 10}, + NVM_OFF_MSIX_VEC_PER_PF_MAX, BNXT_NVM_SHARED_CFG, 10, 4}, {DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, - NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7}, + NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7, 4}, {BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, NVM_OFF_DIS_GRE_VER_CHECK, - BNXT_NVM_SHARED_CFG, 1}, + BNXT_NVM_SHARED_CFG, 1, 1}, }; static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, @@ -232,8 +232,8 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, struct hwrm_nvm_get_variable_input *req = msg; void *data_addr = NULL, *buf = NULL; struct bnxt_dl_nvm_param nvm_param; - int bytesize, idx = 0, rc, i; dma_addr_t data_dma_addr; + int idx = 0, rc, i; /* Get/Set NVM CFG parameter is supported only on PFs */ if (BNXT_VF(bp)) @@ -254,10 +254,9 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, else if (nvm_param.dir_type == BNXT_NVM_FUNC_CFG) idx = bp->pf.fw_fid - BNXT_FIRST_PF_FID; - bytesize = roundup(nvm_param.num_bits, BITS_PER_BYTE) / BITS_PER_BYTE; - switch (bytesize) { + switch (nvm_param.dl_num_bytes) { case 1: - if (nvm_param.num_bits == 1) + if (nvm_param.nvm_num_bits == 1) buf = &val->vbool; else buf = &val->vu8; @@ -272,29 +271,30 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, return -EFAULT; } - data_addr = dma_alloc_coherent(&bp->pdev->dev, bytesize, + data_addr = dma_alloc_coherent(&bp->pdev->dev, nvm_param.dl_num_bytes, &data_dma_addr, GFP_KERNEL); if (!data_addr) return -ENOMEM; req->dest_data_addr = cpu_to_le64(data_dma_addr); - req->data_len = cpu_to_le16(nvm_param.num_bits); + req->data_len = cpu_to_le16(nvm_param.nvm_num_bits); req->option_num = cpu_to_le16(nvm_param.offset); req->index_0 = cpu_to_le16(idx); if (idx) req->dimensions = cpu_to_le16(1); if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) { - memcpy(data_addr, buf, bytesize); + memcpy(data_addr, buf, nvm_param.dl_num_bytes); rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT); } else { rc = hwrm_send_message_silent(bp, msg, msg_len, HWRM_CMD_TIMEOUT); } if (!rc && req->req_type == cpu_to_le16(HWRM_NVM_GET_VARIABLE)) - memcpy(buf, data_addr, bytesize); + memcpy(buf, data_addr, nvm_param.dl_num_bytes); - dma_free_coherent(&bp->pdev->dev, bytesize, data_addr, data_dma_addr); + dma_free_coherent(&bp->pdev->dev, nvm_param.dl_num_bytes, data_addr, + data_dma_addr); if (rc == -EACCES) netdev_err(bp->dev, "PF does not have admin privileges to modify NVM config\n"); return rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h index b97e0baeb42d..2f4fd0a7d04b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -52,7 +52,8 @@ struct bnxt_dl_nvm_param { u16 id; u16 offset; u16 dir_type; - u16 num_bits; + u16 nvm_num_bits; + u8 dl_num_bytes; }; void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event); -- cgit v1.2.3 From 83a46a82b96c1928ad82958752523fb0c7d9fcce Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 21 Oct 2019 01:34:26 -0400 Subject: bnxt_en: Fix devlink NVRAM related byte order related issues. The current code does not do endian swapping between the devlink parameter and the internal NVRAM representation. Define a union to represent the little endian NVRAM data and add 2 helper functions to copy to and from the NVRAM data with the proper byte swapping. Fixes: 782a624d00fa ("bnxt_en: Add bnxt_en initial port params table and register it") Cc: Jiri Pirko Reviewed-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 81 +++++++++++++++-------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 68f74f52fe88..bd4b9f31614e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -226,12 +226,55 @@ static const struct bnxt_dl_nvm_param nvm_params[] = { BNXT_NVM_SHARED_CFG, 1, 1}, }; +union bnxt_nvm_data { + u8 val8; + __le32 val32; +}; + +static void bnxt_copy_to_nvm_data(union bnxt_nvm_data *dst, + union devlink_param_value *src, + int nvm_num_bits, int dl_num_bytes) +{ + u32 val32 = 0; + + if (nvm_num_bits == 1) { + dst->val8 = src->vbool; + return; + } + if (dl_num_bytes == 4) + val32 = src->vu32; + else if (dl_num_bytes == 2) + val32 = (u32)src->vu16; + else if (dl_num_bytes == 1) + val32 = (u32)src->vu8; + dst->val32 = cpu_to_le32(val32); +} + +static void bnxt_copy_from_nvm_data(union devlink_param_value *dst, + union bnxt_nvm_data *src, + int nvm_num_bits, int dl_num_bytes) +{ + u32 val32; + + if (nvm_num_bits == 1) { + dst->vbool = src->val8; + return; + } + val32 = le32_to_cpu(src->val32); + if (dl_num_bytes == 4) + dst->vu32 = val32; + else if (dl_num_bytes == 2) + dst->vu16 = (u16)val32; + else if (dl_num_bytes == 1) + dst->vu8 = (u8)val32; +} + static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, int msg_len, union devlink_param_value *val) { struct hwrm_nvm_get_variable_input *req = msg; - void *data_addr = NULL, *buf = NULL; struct bnxt_dl_nvm_param nvm_param; + union bnxt_nvm_data *data; dma_addr_t data_dma_addr; int idx = 0, rc, i; @@ -254,26 +297,9 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, else if (nvm_param.dir_type == BNXT_NVM_FUNC_CFG) idx = bp->pf.fw_fid - BNXT_FIRST_PF_FID; - switch (nvm_param.dl_num_bytes) { - case 1: - if (nvm_param.nvm_num_bits == 1) - buf = &val->vbool; - else - buf = &val->vu8; - break; - case 2: - buf = &val->vu16; - break; - case 4: - buf = &val->vu32; - break; - default: - return -EFAULT; - } - - data_addr = dma_alloc_coherent(&bp->pdev->dev, nvm_param.dl_num_bytes, - &data_dma_addr, GFP_KERNEL); - if (!data_addr) + data = dma_alloc_coherent(&bp->pdev->dev, sizeof(*data), + &data_dma_addr, GFP_KERNEL); + if (!data) return -ENOMEM; req->dest_data_addr = cpu_to_le64(data_dma_addr); @@ -284,17 +310,18 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, req->dimensions = cpu_to_le16(1); if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) { - memcpy(data_addr, buf, nvm_param.dl_num_bytes); + bnxt_copy_to_nvm_data(data, val, nvm_param.nvm_num_bits, + nvm_param.dl_num_bytes); rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT); } else { rc = hwrm_send_message_silent(bp, msg, msg_len, HWRM_CMD_TIMEOUT); + if (!rc) + bnxt_copy_from_nvm_data(val, data, + nvm_param.nvm_num_bits, + nvm_param.dl_num_bytes); } - if (!rc && req->req_type == cpu_to_le16(HWRM_NVM_GET_VARIABLE)) - memcpy(buf, data_addr, nvm_param.dl_num_bytes); - - dma_free_coherent(&bp->pdev->dev, nvm_param.dl_num_bytes, data_addr, - data_dma_addr); + dma_free_coherent(&bp->pdev->dev, sizeof(*data), data, data_dma_addr); if (rc == -EACCES) netdev_err(bp->dev, "PF does not have admin privileges to modify NVM config\n"); return rc; -- cgit v1.2.3 From c6a9e7aa2e8b15402022a15625284069d4fd6df0 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 21 Oct 2019 01:34:27 -0400 Subject: bnxt_en: Adjust the time to wait before polling firmware readiness. When firmware indicates that driver needs to invoke firmware reset which is common for both error recovery and live firmware reset path, driver needs a different time to wait before polling for firmware readiness. Modify the wait time to fw_reset_min_dsecs, which is initialised to correct timeout for error recovery and firmware reset. Fixes: 4037eb715680 ("bnxt_en: Add a new BNXT_FW_RESET_STATE_POLL_FW_DOWN state.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b4a8cf620a0c..84926184bed2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10669,14 +10669,11 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = BNXT_FW_RESET_STATE_RESET_FW; } /* fall through */ - case BNXT_FW_RESET_STATE_RESET_FW: { - u32 wait_dsecs = bp->fw_health->post_reset_wait_dsecs; - + case BNXT_FW_RESET_STATE_RESET_FW: bnxt_reset_all(bp); bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV; - bnxt_queue_fw_reset_work(bp, wait_dsecs * HZ / 10); + bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10); return; - } case BNXT_FW_RESET_STATE_ENABLE_DEV: if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) && bp->fw_health) { -- cgit v1.2.3 From f255ed1c4e4c5ed8171b6e81dce1297df1f1b60c Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 21 Oct 2019 01:34:28 -0400 Subject: bnxt_en: Minor formatting changes in FW devlink_health_reporter Minor formatting changes to diagnose cb for FW devlink health reporter. Suggested-by: Jiri Pirko Cc: Jiri Pirko Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index bd4b9f31614e..7151244f8c7d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -29,25 +29,20 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter, val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); health_status = val & 0xffff; - if (health_status == BNXT_FW_STATUS_HEALTHY) { - rc = devlink_fmsg_string_pair_put(fmsg, "FW status", - "Healthy;"); - if (rc) - return rc; - } else if (health_status < BNXT_FW_STATUS_HEALTHY) { - rc = devlink_fmsg_string_pair_put(fmsg, "FW status", - "Not yet completed initialization;"); + if (health_status < BNXT_FW_STATUS_HEALTHY) { + rc = devlink_fmsg_string_pair_put(fmsg, "Description", + "Not yet completed initialization"); if (rc) return rc; } else if (health_status > BNXT_FW_STATUS_HEALTHY) { - rc = devlink_fmsg_string_pair_put(fmsg, "FW status", - "Encountered fatal error and cannot recover;"); + rc = devlink_fmsg_string_pair_put(fmsg, "Description", + "Encountered fatal error and cannot recover"); if (rc) return rc; } if (val >> 16) { - rc = devlink_fmsg_u32_pair_put(fmsg, "Error", val >> 16); + rc = devlink_fmsg_u32_pair_put(fmsg, "Error code", val >> 16); if (rc) return rc; } -- cgit v1.2.3 From f6824308c4be25ba024ab942a6135aa0356acaea Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 21 Oct 2019 01:34:29 -0400 Subject: bnxt_en: Avoid disabling pci device in bnxt_remove_one() for already disabled device. With the recently added error recovery logic, the device may already be disabled if the firmware recovery is unsuccessful. In bnxt_remove_one(), check that the device is still enabled first before calling pci_disable_device(). Fixes: 3bc7d4a352ef ("bnxt_en: Add BNXT_STATE_IN_FW_RESET state.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 84926184bed2..04ec909e06df 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10382,7 +10382,8 @@ static void bnxt_cleanup_pci(struct bnxt *bp) { bnxt_unmap_bars(bp, bp->pdev); pci_release_regions(bp->pdev); - pci_disable_device(bp->pdev); + if (pci_is_enabled(bp->pdev)) + pci_disable_device(bp->pdev); } static void bnxt_init_dflt_coal(struct bnxt *bp) -- cgit v1.2.3 From d665c1281bc89ac85b8b0c058c22a3f94640a1d6 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 22 Oct 2019 07:57:42 +0800 Subject: net: sched: taprio: fix -Wmissing-prototypes warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We get one warnings when build kernel W=1: net/sched/sch_taprio.c:1155:6: warning: no previous prototype for ‘taprio_offload_config_changed’ [-Wmissing-prototypes] Make the function static to fix this. Fixes: 9c66d1564676 ("taprio: Add support for hardware offloading") Signed-off-by: Yi Wang Acked-by: Vinicius Costa Gomes Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 6719a65169d4..2121187229cd 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1152,7 +1152,7 @@ EXPORT_SYMBOL_GPL(taprio_offload_free); * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump(). * This is left as TODO. */ -void taprio_offload_config_changed(struct taprio_sched *q) +static void taprio_offload_config_changed(struct taprio_sched *q) { struct sched_gate_list *oper, *admin; -- cgit v1.2.3 From b5b9181c2403025b2c7ae7ea44333fd8fe6dbb54 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 21 Oct 2019 19:02:43 -0600 Subject: selftests: Make l2tp.sh executable Kernel test robot reported that the l2tp.sh test script failed: # selftests: net: l2tp.sh # Warning: file l2tp.sh is not executable, correct this. Set executable bits. Fixes: e858ef1cd4bc ("selftests: Add l2tp tests") Reported-by: kernel test robot Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/l2tp.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/selftests/net/l2tp.sh diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh old mode 100644 new mode 100755 -- cgit v1.2.3 From 9464cc37f3671ee69cb1c00662b5e1f113a96b23 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Mon, 21 Oct 2019 12:01:57 +0200 Subject: net: openvswitch: free vport unless register_netdevice() succeeds syzbot found the following crash on: HEAD commit: 1e78030e Merge tag 'mmc-v5.3-rc1' of git://git.kernel.org/.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=148d3d1a600000 kernel config: https://syzkaller.appspot.com/x/.config?x=30cef20daf3e9977 dashboard link: https://syzkaller.appspot.com/bug?extid=13210896153522fe1ee5 compiler: gcc (GCC) 9.0.0 20181231 (experimental) syz repro: https://syzkaller.appspot.com/x/repro.syz?x=136aa8c4600000 C reproducer: https://syzkaller.appspot.com/x/repro.c?x=109ba792600000 ===================================================================== BUG: memory leak unreferenced object 0xffff8881207e4100 (size 128): comm "syz-executor032", pid 7014, jiffies 4294944027 (age 13.830s) hex dump (first 32 bytes): 00 70 16 18 81 88 ff ff 80 af 8c 22 81 88 ff ff .p.........".... 00 b6 23 17 81 88 ff ff 00 00 00 00 00 00 00 00 ..#............. backtrace: [<000000000eb78212>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<000000000eb78212>] slab_post_alloc_hook mm/slab.h:522 [inline] [<000000000eb78212>] slab_alloc mm/slab.c:3319 [inline] [<000000000eb78212>] kmem_cache_alloc_trace+0x145/0x2c0 mm/slab.c:3548 [<00000000006ea6c6>] kmalloc include/linux/slab.h:552 [inline] [<00000000006ea6c6>] kzalloc include/linux/slab.h:748 [inline] [<00000000006ea6c6>] ovs_vport_alloc+0x37/0xf0 net/openvswitch/vport.c:130 [<00000000f9a04a7d>] internal_dev_create+0x24/0x1d0 net/openvswitch/vport-internal_dev.c:164 [<0000000056ee7c13>] ovs_vport_add+0x81/0x190 net/openvswitch/vport.c:199 [<000000005434efc7>] new_vport+0x19/0x80 net/openvswitch/datapath.c:194 [<00000000b7b253f1>] ovs_dp_cmd_new+0x22f/0x410 net/openvswitch/datapath.c:1614 [<00000000e0988518>] genl_family_rcv_msg+0x2ab/0x5b0 net/netlink/genetlink.c:629 [<00000000d0cc9347>] genl_rcv_msg+0x54/0x9c net/netlink/genetlink.c:654 [<000000006694b647>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<0000000088381f37>] genl_rcv+0x29/0x40 net/netlink/genetlink.c:665 [<00000000dad42a47>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000dad42a47>] netlink_unicast+0x1ec/0x2d0 net/netlink/af_netlink.c:1328 [<0000000067e6b079>] netlink_sendmsg+0x270/0x480 net/netlink/af_netlink.c:1917 [<00000000aab08a47>] sock_sendmsg_nosec net/socket.c:637 [inline] [<00000000aab08a47>] sock_sendmsg+0x54/0x70 net/socket.c:657 [<000000004cb7c11d>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2311 [<00000000c4901c63>] __sys_sendmsg+0x80/0xf0 net/socket.c:2356 [<00000000c10abb2d>] __do_sys_sendmsg net/socket.c:2365 [inline] [<00000000c10abb2d>] __se_sys_sendmsg net/socket.c:2363 [inline] [<00000000c10abb2d>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2363 BUG: memory leak unreferenced object 0xffff88811723b600 (size 64): comm "syz-executor032", pid 7014, jiffies 4294944027 (age 13.830s) hex dump (first 32 bytes): 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 02 00 00 00 05 35 82 c1 .............5.. backtrace: [<00000000352f46d8>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<00000000352f46d8>] slab_post_alloc_hook mm/slab.h:522 [inline] [<00000000352f46d8>] slab_alloc mm/slab.c:3319 [inline] [<00000000352f46d8>] __do_kmalloc mm/slab.c:3653 [inline] [<00000000352f46d8>] __kmalloc+0x169/0x300 mm/slab.c:3664 [<000000008e48f3d1>] kmalloc include/linux/slab.h:557 [inline] [<000000008e48f3d1>] ovs_vport_set_upcall_portids+0x54/0xd0 net/openvswitch/vport.c:343 [<00000000541e4f4a>] ovs_vport_alloc+0x7f/0xf0 net/openvswitch/vport.c:139 [<00000000f9a04a7d>] internal_dev_create+0x24/0x1d0 net/openvswitch/vport-internal_dev.c:164 [<0000000056ee7c13>] ovs_vport_add+0x81/0x190 net/openvswitch/vport.c:199 [<000000005434efc7>] new_vport+0x19/0x80 net/openvswitch/datapath.c:194 [<00000000b7b253f1>] ovs_dp_cmd_new+0x22f/0x410 net/openvswitch/datapath.c:1614 [<00000000e0988518>] genl_family_rcv_msg+0x2ab/0x5b0 net/netlink/genetlink.c:629 [<00000000d0cc9347>] genl_rcv_msg+0x54/0x9c net/netlink/genetlink.c:654 [<000000006694b647>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<0000000088381f37>] genl_rcv+0x29/0x40 net/netlink/genetlink.c:665 [<00000000dad42a47>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000dad42a47>] netlink_unicast+0x1ec/0x2d0 net/netlink/af_netlink.c:1328 [<0000000067e6b079>] netlink_sendmsg+0x270/0x480 net/netlink/af_netlink.c:1917 [<00000000aab08a47>] sock_sendmsg_nosec net/socket.c:637 [inline] [<00000000aab08a47>] sock_sendmsg+0x54/0x70 net/socket.c:657 [<000000004cb7c11d>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2311 [<00000000c4901c63>] __sys_sendmsg+0x80/0xf0 net/socket.c:2356 BUG: memory leak unreferenced object 0xffff8881228ca500 (size 128): comm "syz-executor032", pid 7015, jiffies 4294944622 (age 7.880s) hex dump (first 32 bytes): 00 f0 27 18 81 88 ff ff 80 ac 8c 22 81 88 ff ff ..'........".... 40 b7 23 17 81 88 ff ff 00 00 00 00 00 00 00 00 @.#............. backtrace: [<000000000eb78212>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<000000000eb78212>] slab_post_alloc_hook mm/slab.h:522 [inline] [<000000000eb78212>] slab_alloc mm/slab.c:3319 [inline] [<000000000eb78212>] kmem_cache_alloc_trace+0x145/0x2c0 mm/slab.c:3548 [<00000000006ea6c6>] kmalloc include/linux/slab.h:552 [inline] [<00000000006ea6c6>] kzalloc include/linux/slab.h:748 [inline] [<00000000006ea6c6>] ovs_vport_alloc+0x37/0xf0 net/openvswitch/vport.c:130 [<00000000f9a04a7d>] internal_dev_create+0x24/0x1d0 net/openvswitch/vport-internal_dev.c:164 [<0000000056ee7c13>] ovs_vport_add+0x81/0x190 net/openvswitch/vport.c:199 [<000000005434efc7>] new_vport+0x19/0x80 net/openvswitch/datapath.c:194 [<00000000b7b253f1>] ovs_dp_cmd_new+0x22f/0x410 net/openvswitch/datapath.c:1614 [<00000000e0988518>] genl_family_rcv_msg+0x2ab/0x5b0 net/netlink/genetlink.c:629 [<00000000d0cc9347>] genl_rcv_msg+0x54/0x9c net/netlink/genetlink.c:654 [<000000006694b647>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<0000000088381f37>] genl_rcv+0x29/0x40 net/netlink/genetlink.c:665 [<00000000dad42a47>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000dad42a47>] netlink_unicast+0x1ec/0x2d0 net/netlink/af_netlink.c:1328 [<0000000067e6b079>] netlink_sendmsg+0x270/0x480 net/netlink/af_netlink.c:1917 [<00000000aab08a47>] sock_sendmsg_nosec net/socket.c:637 [inline] [<00000000aab08a47>] sock_sendmsg+0x54/0x70 net/socket.c:657 [<000000004cb7c11d>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2311 [<00000000c4901c63>] __sys_sendmsg+0x80/0xf0 net/socket.c:2356 [<00000000c10abb2d>] __do_sys_sendmsg net/socket.c:2365 [inline] [<00000000c10abb2d>] __se_sys_sendmsg net/socket.c:2363 [inline] [<00000000c10abb2d>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2363 ===================================================================== The function in net core, register_netdevice(), may fail with vport's destruction callback either invoked or not. After commit 309b66970ee2 ("net: openvswitch: do not free vport if register_netdevice() is failed."), the duty to destroy vport is offloaded from the driver OTOH, which ends up in the memory leak reported. It is fixed by releasing vport unless device is registered successfully. To do that, the callback assignment is defered until device is registered. Reported-by: syzbot+13210896153522fe1ee5@syzkaller.appspotmail.com Fixes: 309b66970ee2 ("net: openvswitch: do not free vport if register_netdevice() is failed.") Cc: Taehee Yoo Cc: Greg Rose Cc: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Ying Xue Cc: Andrey Konovalov Signed-off-by: Hillf Danton Acked-by: Pravin B Shelar [sbrivio: this was sent to dev@openvswitch.org and never made its way to netdev -- resending original patch] Signed-off-by: Stefano Brivio Reviewed-by: Greg Rose Signed-off-by: Jakub Kicinski --- net/openvswitch/vport-internal_dev.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 21c90d3a7ebf..58a7b8312c28 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -137,7 +137,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | IFF_NO_QUEUE; netdev->needs_free_netdev = true; - netdev->priv_destructor = internal_dev_destructor; + netdev->priv_destructor = NULL; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; @@ -159,7 +159,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) struct internal_dev *internal_dev; struct net_device *dev; int err; - bool free_vport = true; vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms); if (IS_ERR(vport)) { @@ -190,10 +189,9 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) rtnl_lock(); err = register_netdevice(vport->dev); - if (err) { - free_vport = false; + if (err) goto error_unlock; - } + vport->dev->priv_destructor = internal_dev_destructor; dev_set_promiscuity(vport->dev, 1); rtnl_unlock(); @@ -207,8 +205,7 @@ error_unlock: error_free_netdev: free_netdev(dev); error_free_vport: - if (free_vport) - ovs_vport_free(vport); + ovs_vport_free(vport); error: return ERR_PTR(err); } -- cgit v1.2.3 From 6c5d9c2a6bedbb3c3c14253776320c0ee564f064 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Tue, 22 Oct 2019 15:44:40 +0100 Subject: ipv6: include for missing declarations Include for the missing declarations of various functions. Fixes the following sparse warnings: net/ipv6/addrconf_core.c:94:5: warning: symbol 'register_inet6addr_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:100:5: warning: symbol 'unregister_inet6addr_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:106:5: warning: symbol 'inet6addr_notifier_call_chain' was not declared. Should it be static? net/ipv6/addrconf_core.c:112:5: warning: symbol 'register_inet6addr_validator_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:118:5: warning: symbol 'unregister_inet6addr_validator_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:125:5: warning: symbol 'inet6addr_validator_notifier_call_chain' was not declared. Should it be static? net/ipv6/addrconf_core.c:237:6: warning: symbol 'in6_dev_finish_destroy' was not declared. Should it be static? Signed-off-by: Ben Dooks (Codethink) Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 783f3c1466da..2fc079284ca4 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -7,6 +7,7 @@ #include #include #include +#include #include /* if ipv6 module registers this function is used by xfrm to force all -- cgit v1.2.3 From 0fd103ccfe6a06e40e2d9d8c91d96332cc9e1239 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 22 Oct 2019 09:21:12 +0200 Subject: scsi: lpfc: Honor module parameter lpfc_use_adisc The initial lpfc_desc_set_adisc implementation in commit dea3101e0a5c ("lpfc: add Emulex FC driver version 8.0.28") enabled ADISC if cfg_use_adisc && RSCN_MODE && FCP_2_DEVICE In commit 92d7f7b0cde3 ("[SCSI] lpfc: NPIV: add NPIV support on top of SLI-3") this changed to (cfg_use_adisc && RSC_MODE) || FCP_2_DEVICE and later in commit ffc954936b13 ("[SCSI] lpfc 8.3.13: FC Discovery Fixes and enhancements.") to (cfg_use_adisc && RSC_MODE) || (FCP_2_DEVICE && FCP_TARGET) A customer reports that after a devloss, an ADISC failure is logged. It turns out the ADISC flag is set even the user explicitly set lpfc_use_adisc = 0. [Sat Dec 22 22:55:58 2018] lpfc 0000:82:00.0: 2:(0):0203 Devloss timeout on WWPN 50:01:43:80:12:8e:40:20 NPort x05df00 Data: x82000000 x8 xa [Sat Dec 22 23:08:20 2018] lpfc 0000:82:00.0: 2:(0):2755 ADISC failure DID:05DF00 Status:x9/x70000 [mkp: fixed Hannes' email] Fixes: 92d7f7b0cde3 ("[SCSI] lpfc: NPIV: add NPIV support on top of SLI-3") Cc: Dick Kennedy Cc: James Smart Link: https://lore.kernel.org/r/20191022072112.132268-1-dwagner@suse.de Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Daniel Wagner Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nportdisc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index f4b879d25fe9..fc6e4546d738 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -851,9 +851,9 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if (!(vport->fc_flag & FC_PT2PT)) { /* Check config parameter use-adisc or FCP-2 */ - if ((vport->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) || + if (vport->cfg_use_adisc && ((vport->fc_flag & FC_RSCN_MODE) || ((ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) && - (ndlp->nlp_type & NLP_FCP_TARGET))) { + (ndlp->nlp_type & NLP_FCP_TARGET)))) { spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_ADISC; spin_unlock_irq(shost->host_lock); -- cgit v1.2.3 From c2ff2a36eff60efb5e123c940115216d6bf65684 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Tue, 22 Oct 2019 12:36:42 -0700 Subject: scsi: qla2xxx: Initialized mailbox to prevent driver load failure This patch fixes issue with Gen7 adapter in a blade environment where one of the ports will not be detected by driver. Firmware expects mailbox 11 to be set or cleared by driver for newer ISP. Following message is seen in the log file: [ 18.810892] qla2xxx [0000:d8:00.0]-1820:1: **** Failed=102 mb[0]=4005 mb[1]=37 mb[2]=20 mb[3]=8 [ 18.819596] cmd=2 **** [mkp: typos] Link: https://lore.kernel.org/r/20191022193643.7076-2-hmadhani@marvell.com Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_mbx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 1cc6913f76c4..4a1f21c11758 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -702,6 +702,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) mcp->mb[2] = LSW(risc_addr); mcp->mb[3] = 0; mcp->mb[4] = 0; + mcp->mb[11] = 0; ha->flags.using_lr_setting = 0; if (IS_QLA25XX(ha) || IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { @@ -746,7 +747,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) if (ha->flags.exchoffld_enabled) mcp->mb[4] |= ENABLE_EXCHANGE_OFFLD; - mcp->out_mb |= MBX_4|MBX_3|MBX_2|MBX_1; + mcp->out_mb |= MBX_4 | MBX_3 | MBX_2 | MBX_1 | MBX_11; mcp->in_mb |= MBX_3 | MBX_2 | MBX_1; } else { mcp->mb[1] = LSW(risc_addr); -- cgit v1.2.3 From 8d8b83f5be2a3bdac3695a94e6cb5e50bd114869 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 22 Oct 2019 12:36:43 -0700 Subject: scsi: qla2xxx: Fix partial flash write of MBI For new adapters with multiple flash regions to write to, current code allows FW & Boot regions to be written, while other regions are blocked via sysfs. The fix is to block all flash read/write through sysfs interface. Fixes: e81d1bcbde06 ("scsi: qla2xxx: Further limit FLASH region write access from SysFS") Cc: stable@vger.kernel.org # 5.2 Link: https://lore.kernel.org/r/20191022193643.7076-3-hmadhani@marvell.com Signed-off-by: Quinn Tran Signed-off-by: Girish Basrur Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_attr.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 8b3015361428..8705ca6395e4 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -440,9 +440,6 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, valid = 0; if (ha->optrom_size == OPTROM_SIZE_2300 && start == 0) valid = 1; - else if (start == (ha->flt_region_boot * 4) || - start == (ha->flt_region_fw * 4)) - valid = 1; else if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha)) valid = 1; if (!valid) { @@ -489,8 +486,10 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, "Writing flash region -- 0x%x/0x%x.\n", ha->optrom_region_start, ha->optrom_region_size); - ha->isp_ops->write_optrom(vha, ha->optrom_buffer, + rval = ha->isp_ops->write_optrom(vha, ha->optrom_buffer, ha->optrom_region_start, ha->optrom_region_size); + if (rval) + rval = -EIO; break; default: rval = -EINVAL; -- cgit v1.2.3 From 3b4d9eb2ee74dd5ea7fa36cffb0ca7f5bc4924da Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 22 Oct 2019 23:30:38 +0200 Subject: bpf: Fix use after free in bpf_get_prog_name There is one more problematic case I noticed while recently fixing BPF kallsyms handling in cd7455f1013e ("bpf: Fix use after free in subprog's jited symbol removal") and that is bpf_get_prog_name(). If BTF has been attached to the prog, then we may be able to fetch the function signature type id in kallsyms through prog->aux->func_info[prog->aux->func_idx].type_id. However, while the BTF object itself is torn down via RCU callback, the prog's aux->func_info is immediately freed via kvfree(prog->aux->func_info) once the prog's refcount either hit zero or when subprograms were already exposed via kallsyms and we hit the error path added in 5482e9a93c83 ("bpf: Fix memleak in aux->func_info and aux->btf"). This violates RCU as well since kallsyms could be walked in parallel where we could access aux->func_info. Hence, defer kvfree() to after RCU grace period. Looking at ba64e7d85252 ("bpf: btf: support proper non-jit func info") there is no reason/dependency where we couldn't defer the kvfree(aux->func_info) into the RCU callback. Fixes: 5482e9a93c83 ("bpf: Fix memleak in aux->func_info and aux->btf") Fixes: ba64e7d85252 ("bpf: btf: support proper non-jit func info") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Cc: Martin KaFai Lau Link: https://lore.kernel.org/bpf/875f2906a7c1a0691f2d567b4d8e4ea2739b1e88.1571779205.git.daniel@iogearbox.net --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index bcfc362de4f2..0937719b87e2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1326,6 +1326,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) { struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); + kvfree(aux->func_info); free_used_maps(aux); bpf_prog_uncharge_memlock(aux->prog); security_bpf_prog_free(aux); @@ -1336,7 +1337,6 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) { bpf_prog_kallsyms_del_all(prog); btf_put(prog->aux->btf); - kvfree(prog->aux->func_info); bpf_prog_free_linfo(prog); if (deferred) -- cgit v1.2.3 From 80da5a809d193c60d090cbdf4fe316781bc07965 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 23 Oct 2019 10:02:49 +0800 Subject: virtiofs: Remove set but not used variable 'fc' Fixes gcc '-Wunused-but-set-variable' warning: fs/fuse/virtio_fs.c: In function virtio_fs_wake_pending_and_unlock: fs/fuse/virtio_fs.c:983:20: warning: variable fc set but not used [-Wunused-but-set-variable] It is not used since commit 7ee1e2e631db ("virtiofs: No need to check fpq->connected state") Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 2de8fc0d6a24..a5c86048b96e 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -980,7 +980,6 @@ __releases(fiq->lock) { unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ struct virtio_fs *fs; - struct fuse_conn *fc; struct fuse_req *req; struct virtio_fs_vq *fsvq; int ret; @@ -993,7 +992,6 @@ __releases(fiq->lock) spin_unlock(&fiq->lock); fs = fiq->priv; - fc = fs->vqs[queue_id].fud->fc; pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", __func__, req->in.h.opcode, req->in.h.unique, -- cgit v1.2.3 From 7cded5658329dd26b9a80d4a6de2665bf93e9006 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 18 Oct 2019 10:49:01 +0300 Subject: iwlwifi: pcie: fix merge damage on making QnJ exclusive Two patches were sent out of order: one removed some conditions from an if and the other moved the code elsewhere. When sending the patch that moved the code, an older version of the original code was moved, causing the "make QnJ exclusive" code to be essentially undone. Fix that by removing the inclusive conditions from the check again. Fixes: 809805a820c6 ("iwlwifi: pcie: move some cfg mangling from trans_pcie_alloc to probe") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 6f4bb7ce71a5..9d41d783e59f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1067,11 +1067,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) && - ((cfg != &iwl_ax200_cfg_cc && - cfg != &killer1650x_2ax_cfg && - cfg != &killer1650w_2ax_cfg && - cfg != &iwl_ax201_cfg_quz_hr) || - iwl_trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0)) { + iwl_trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0) { u32 hw_status; hw_status = iwl_read_prph(iwl_trans, UMAG_GEN_HW_STATUS); -- cgit v1.2.3 From 8c55dedb795be8ec0cf488f98c03a1c2176f7fb1 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 18 Oct 2019 07:43:21 -0400 Subject: rtlwifi: Fix potential overflow on P2P code Nicolas Waisman noticed that even though noa_len is checked for a compatible length it's still possible to overrun the buffers of p2pinfo since there's no check on the upper bound of noa_num. Bound noa_num against P2P_MAX_NOA_NUM. Reported-by: Nicolas Waisman Signed-off-by: Laura Abbott Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/ps.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index 70f04c2f5b17..fff8dda14023 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -754,6 +754,9 @@ static void rtl_p2p_noa_ie(struct ieee80211_hw *hw, void *data, return; } else { noa_num = (noa_len - 2) / 13; + if (noa_num > P2P_MAX_NOA_NUM) + noa_num = P2P_MAX_NOA_NUM; + } noa_index = ie[3]; if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode == @@ -848,6 +851,9 @@ static void rtl_p2p_action_ie(struct ieee80211_hw *hw, void *data, return; } else { noa_num = (noa_len - 2) / 13; + if (noa_num > P2P_MAX_NOA_NUM) + noa_num = P2P_MAX_NOA_NUM; + } noa_index = ie[3]; if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode == -- cgit v1.2.3 From 6dea7da7019aa04c02edf1878c9c2e59d6cb75a5 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Sat, 19 Oct 2019 13:03:27 +0300 Subject: iwlwifi: pcie: fix PCI ID 0x2720 configs that should be soc Some entries for PCI ID 0x2720 were using iwl9260_2ac_cfg, but the correct is to use iwl9260_2ac_cfg_soc. Fix that. Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 9d41d783e59f..b7c3737c5c2f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -618,9 +618,9 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0214, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x271C, 0x0214, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_160_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_160_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x0060, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x0064, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x00A0, iwl9462_2ac_cfg_soc)}, @@ -640,7 +640,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2720, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_soc)}, -- cgit v1.2.3 From e55890150a961944e861a46efc8599f80f25de76 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Sat, 19 Oct 2019 13:03:28 +0300 Subject: iwlwifi: pcie: fix all 9460 entries for qnj A bunch of the entries for qnj were wrong. The 9460 device doesn't exist, so update them to 9461 and 9462. There are still a bunch of other occurrences of 9460, but that will be fixed separately. Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index b7c3737c5c2f..03568f18a171 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -573,20 +573,20 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2526, 0x0034, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0038, iwl9560_2ac_160_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x003C, iwl9560_2ac_160_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x0210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0214, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0230, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0234, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0238, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x023C, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0260, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0260, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x0264, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9460_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x1010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1210, iwl9260_2ac_cfg)}, @@ -603,7 +603,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2526, 0x401C, iwl9260_2ac_160_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_160_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x4034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x6010, iwl9260_2ac_160_cfg)}, -- cgit v1.2.3 From 91cf5dede57f9c4030c1378745d612eec2075652 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 19 Oct 2019 13:03:29 +0300 Subject: iwlwifi: mvm: handle iwl_mvm_tvqm_enable_txq() error return iwl_mvm_tvqm_enable_txq() can return an error, notably if unable to allocate memory for the queue. Handle this error throughout, avoiding storing the invalid value into a u16 which later leads to a disable of an invalid queue ("queue 65524 not used", where 65524 is just -ENOMEM in a u16). Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 140 ++++++++++++++++----------- 1 file changed, 83 insertions(+), 57 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 0bedba4c61f2..b3768d5d852a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1482,6 +1482,13 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, mvm_sta->sta_id, i); txq_id = iwl_mvm_tvqm_enable_txq(mvm, mvm_sta->sta_id, i, wdg); + /* + * on failures, just set it to IWL_MVM_INVALID_QUEUE + * to try again later, we have no other good way of + * failing here + */ + if (txq_id < 0) + txq_id = IWL_MVM_INVALID_QUEUE; tid_data->txq_id = txq_id; /* @@ -1950,30 +1957,73 @@ void iwl_mvm_dealloc_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta) sta->sta_id = IWL_MVM_INVALID_STA; } -static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 *queue, +static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 queue, u8 sta_id, u8 fifo) { unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ? mvm->trans->trans_cfg->base_params->wd_timeout : IWL_WATCHDOG_DISABLED; + struct iwl_trans_txq_scd_cfg cfg = { + .fifo = fifo, + .sta_id = sta_id, + .tid = IWL_MAX_TID_COUNT, + .aggregate = false, + .frame_limit = IWL_FRAME_LIMIT, + }; + + WARN_ON(iwl_mvm_has_new_tx_api(mvm)); + + iwl_mvm_enable_txq(mvm, NULL, queue, 0, &cfg, wdg_timeout); +} + +static int iwl_mvm_enable_aux_snif_queue_tvqm(struct iwl_mvm *mvm, u8 sta_id) +{ + unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ? + mvm->trans->trans_cfg->base_params->wd_timeout : + IWL_WATCHDOG_DISABLED; + + WARN_ON(!iwl_mvm_has_new_tx_api(mvm)); + + return iwl_mvm_tvqm_enable_txq(mvm, sta_id, IWL_MAX_TID_COUNT, + wdg_timeout); +} +static int iwl_mvm_add_int_sta_with_queue(struct iwl_mvm *mvm, int macidx, + int maccolor, + struct iwl_mvm_int_sta *sta, + u16 *queue, int fifo) +{ + int ret; + + /* Map queue to fifo - needs to happen before adding station */ + if (!iwl_mvm_has_new_tx_api(mvm)) + iwl_mvm_enable_aux_snif_queue(mvm, *queue, sta->sta_id, fifo); + + ret = iwl_mvm_add_int_sta_common(mvm, sta, NULL, macidx, maccolor); + if (ret) { + if (!iwl_mvm_has_new_tx_api(mvm)) + iwl_mvm_disable_txq(mvm, NULL, *queue, + IWL_MAX_TID_COUNT, 0); + return ret; + } + + /* + * For 22000 firmware and on we cannot add queue to a station unknown + * to firmware so enable queue here - after the station was added + */ if (iwl_mvm_has_new_tx_api(mvm)) { - int tvqm_queue = - iwl_mvm_tvqm_enable_txq(mvm, sta_id, - IWL_MAX_TID_COUNT, - wdg_timeout); - *queue = tvqm_queue; - } else { - struct iwl_trans_txq_scd_cfg cfg = { - .fifo = fifo, - .sta_id = sta_id, - .tid = IWL_MAX_TID_COUNT, - .aggregate = false, - .frame_limit = IWL_FRAME_LIMIT, - }; + int txq; - iwl_mvm_enable_txq(mvm, NULL, *queue, 0, &cfg, wdg_timeout); + txq = iwl_mvm_enable_aux_snif_queue_tvqm(mvm, sta->sta_id); + if (txq < 0) { + iwl_mvm_rm_sta_common(mvm, sta->sta_id); + return txq; + } + + *queue = txq; } + + return 0; } int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm) @@ -1989,59 +2039,26 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm) if (ret) return ret; - /* Map Aux queue to fifo - needs to happen before adding Aux station */ - if (!iwl_mvm_has_new_tx_api(mvm)) - iwl_mvm_enable_aux_snif_queue(mvm, &mvm->aux_queue, - mvm->aux_sta.sta_id, - IWL_MVM_TX_FIFO_MCAST); - - ret = iwl_mvm_add_int_sta_common(mvm, &mvm->aux_sta, NULL, - MAC_INDEX_AUX, 0); + ret = iwl_mvm_add_int_sta_with_queue(mvm, MAC_INDEX_AUX, 0, + &mvm->aux_sta, &mvm->aux_queue, + IWL_MVM_TX_FIFO_MCAST); if (ret) { iwl_mvm_dealloc_int_sta(mvm, &mvm->aux_sta); return ret; } - /* - * For 22000 firmware and on we cannot add queue to a station unknown - * to firmware so enable queue here - after the station was added - */ - if (iwl_mvm_has_new_tx_api(mvm)) - iwl_mvm_enable_aux_snif_queue(mvm, &mvm->aux_queue, - mvm->aux_sta.sta_id, - IWL_MVM_TX_FIFO_MCAST); - return 0; } int iwl_mvm_add_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - int ret; lockdep_assert_held(&mvm->mutex); - /* Map snif queue to fifo - must happen before adding snif station */ - if (!iwl_mvm_has_new_tx_api(mvm)) - iwl_mvm_enable_aux_snif_queue(mvm, &mvm->snif_queue, - mvm->snif_sta.sta_id, + return iwl_mvm_add_int_sta_with_queue(mvm, mvmvif->id, mvmvif->color, + &mvm->snif_sta, &mvm->snif_queue, IWL_MVM_TX_FIFO_BE); - - ret = iwl_mvm_add_int_sta_common(mvm, &mvm->snif_sta, vif->addr, - mvmvif->id, 0); - if (ret) - return ret; - - /* - * For 22000 firmware and on we cannot add queue to a station unknown - * to firmware so enable queue here - after the station was added - */ - if (iwl_mvm_has_new_tx_api(mvm)) - iwl_mvm_enable_aux_snif_queue(mvm, &mvm->snif_queue, - mvm->snif_sta.sta_id, - IWL_MVM_TX_FIFO_BE); - - return 0; } int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) @@ -2133,6 +2150,10 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) queue = iwl_mvm_tvqm_enable_txq(mvm, bsta->sta_id, IWL_MAX_TID_COUNT, wdg_timeout); + if (queue < 0) { + iwl_mvm_rm_sta_common(mvm, bsta->sta_id); + return queue; + } if (vif->type == NL80211_IFTYPE_AP || vif->type == NL80211_IFTYPE_ADHOC) @@ -2307,10 +2328,8 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) } ret = iwl_mvm_add_int_sta_common(mvm, msta, maddr, mvmvif->id, mvmvif->color); - if (ret) { - iwl_mvm_dealloc_int_sta(mvm, msta); - return ret; - } + if (ret) + goto err; /* * Enable cab queue after the ADD_STA command is sent. @@ -2323,6 +2342,10 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) int queue = iwl_mvm_tvqm_enable_txq(mvm, msta->sta_id, 0, timeout); + if (queue < 0) { + ret = queue; + goto err; + } mvmvif->cab_queue = queue; } else if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_STA_TYPE)) @@ -2330,6 +2353,9 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) timeout); return 0; +err: + iwl_mvm_dealloc_int_sta(mvm, msta); + return ret; } static int __iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, u8 sta_id, -- cgit v1.2.3 From 9a47cb988338796b70c544919a8b6ba1f2245edb Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Sat, 19 Oct 2019 13:03:30 +0300 Subject: iwlwifi: pcie: add workaround for power gating in integrated 22000 Add a workaround that forces power gating to be enabled on integrated 22000 devices. This improves power saving in certain situations. Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 1 + drivers/net/wireless/intel/iwlwifi/iwl-prph.h | 5 +++++ .../net/wireless/intel/iwlwifi/pcie/trans-gen2.c | 25 ++++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index cb4c5514a556..695bbaa86273 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -279,6 +279,7 @@ * Indicates MAC is entering a power-saving sleep power-down. * Not a good time to access device-internal resources. */ +#define CSR_GP_CNTRL_REG_FLAG_INIT_DONE (0x00000004) #define CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP (0x00000010) #define CSR_GP_CNTRL_REG_FLAG_XTAL_ON (0x00000400) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index f47e0f97acf8..23c25a7665f2 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -449,6 +449,11 @@ enum { #define PERSISTENCE_BIT BIT(12) #define PREG_WFPM_ACCESS BIT(12) +#define HPM_HIPM_GEN_CFG 0xA03458 +#define HPM_HIPM_GEN_CFG_CR_PG_EN BIT(0) +#define HPM_HIPM_GEN_CFG_CR_SLP_EN BIT(1) +#define HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE BIT(10) + #define UREG_DOORBELL_TO_ISR6 0xA05C04 #define UREG_DOORBELL_TO_ISR6_NMI_BIT BIT(0) #define UREG_DOORBELL_TO_ISR6_SUSPEND BIT(18) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index df8455f14e4d..ca3bb4d65b00 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -57,6 +57,24 @@ #include "internal.h" #include "fw/dbg.h" +static int iwl_pcie_gen2_force_power_gating(struct iwl_trans *trans) +{ + iwl_set_bits_prph(trans, HPM_HIPM_GEN_CFG, + HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE); + udelay(20); + iwl_set_bits_prph(trans, HPM_HIPM_GEN_CFG, + HPM_HIPM_GEN_CFG_CR_PG_EN | + HPM_HIPM_GEN_CFG_CR_SLP_EN); + udelay(20); + iwl_clear_bits_prph(trans, HPM_HIPM_GEN_CFG, + HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE); + + iwl_trans_sw_reset(trans); + iwl_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + return 0; +} + /* * Start up NIC's basic functionality after it has been reset * (e.g. after platform boot, or shutdown via iwl_pcie_apm_stop()) @@ -92,6 +110,13 @@ int iwl_pcie_gen2_apm_init(struct iwl_trans *trans) iwl_pcie_apm_config(trans); + if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_22000 && + trans->cfg->integrated) { + ret = iwl_pcie_gen2_force_power_gating(trans); + if (ret) + return ret; + } + ret = iwl_finish_nic_init(trans, trans->trans_cfg); if (ret) return ret; -- cgit v1.2.3 From 17c216ed6b9eef34e647192063f6149d33eff579 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Sat, 19 Oct 2019 13:03:31 +0300 Subject: iwlwifi: pcie: 0x2720 is qu and 0x30DC is not When converting the wrong qu configurations in an earlier commit, I accidentally swapped 0x2720 and 0x30DC. Instead of converting 0x2720, I converted 0x30DC. Undo 0x30DC and convert 0x2720. Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 109 +++++++++++++------------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 03568f18a171..040cec17d3ad 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -618,60 +618,61 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0214, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x271C, 0x0214, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x0060, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x0064, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x00A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x00A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x0230, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x0234, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x0238, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x023C, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x0260, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x0264, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x02A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x02A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x1010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x1210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x4034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_soc)}, - - {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + + {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + + {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0064, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x00A0, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x00A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0230, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0234, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0238, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x023C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0260, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0264, iwl9461_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x02A0, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x02A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x1010, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x1210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x30DC, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x1552, iwl9560_killer_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_160_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg_shared_clk)}, -- cgit v1.2.3 From b43f4a169f220e459edf3ea8f8cd3ec4ae7fa82d Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 20 Oct 2019 19:56:58 -0500 Subject: rtlwifi: rtl_pci: Fix problem of too small skb->len In commit 8020919a9b99 ("mac80211: Properly handle SKB with radiotap only"), buffers whose length is too short cause a WARN_ON(1) to be executed. This change exposed a fault in rtlwifi drivers, which is fixed by regarding packets with skb->len <= FCS_LEN as though they are in error and dropping them. The test is now annotated as likely. Cc: Stable # v5.0+ Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 6087ec7a90a6..f88d26535978 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -822,7 +822,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) hdr = rtl_get_hdr(skb); fc = rtl_get_fc(skb); - if (!stats.crc && !stats.hwerror) { + if (!stats.crc && !stats.hwerror && (skb->len > FCS_LEN)) { memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status)); @@ -859,6 +859,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) _rtl_pci_rx_to_mac80211(hw, skb, rx_status); } } else { + /* drop packets with errors or those too short */ dev_kfree_skb_any(skb); } new_trx_end: -- cgit v1.2.3 From daf61b026f4686250e6afa619e6d7b49edc61df7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2019 11:03:15 +0200 Subject: netfilter: nf_flow_table: set timeout before insertion into hashes Other garbage collector might remove an entry not fully set up yet. [570953.958293] RIP: 0010:memcmp+0x9/0x50 [...] [570953.958567] flow_offload_hash_cmp+0x1e/0x30 [nf_flow_table] [570953.958585] flow_offload_lookup+0x8c/0x110 [nf_flow_table] [570953.958606] nf_flow_offload_ip_hook+0x135/0xb30 [nf_flow_table] [570953.958624] nf_flow_offload_inet_hook+0x35/0x37 [nf_flow_table_inet] [570953.958646] nf_hook_slow+0x3c/0xb0 [570953.958664] __netif_receive_skb_core+0x90f/0xb10 [570953.958678] ? ip_rcv_finish+0x82/0xa0 [570953.958692] __netif_receive_skb_one_core+0x3b/0x80 [570953.958711] __netif_receive_skb+0x18/0x60 [570953.958727] netif_receive_skb_internal+0x45/0xf0 [570953.958741] napi_gro_receive+0xcd/0xf0 [570953.958764] ixgbe_clean_rx_irq+0x432/0xe00 [ixgbe] [570953.958782] ixgbe_poll+0x27b/0x700 [ixgbe] [570953.958796] net_rx_action+0x284/0x3c0 [570953.958817] __do_softirq+0xcc/0x27c [570953.959464] irq_exit+0xe8/0x100 [570953.960097] do_IRQ+0x59/0xe0 [570953.960734] common_interrupt+0xf/0xf Fixes: 43c8f131184f ("netfilter: nf_flow_table: fix missing error check for rhashtable_insert_fast") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 132f5228b431..128245efe84a 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -202,6 +202,8 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) { int err; + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + err = rhashtable_insert_fast(&flow_table->rhashtable, &flow->tuplehash[0].node, nf_flow_offload_rhash_params); @@ -218,7 +220,6 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) return err; } - flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; return 0; } EXPORT_SYMBOL_GPL(flow_offload_add); -- cgit v1.2.3 From 085461c8976e6cb4d5b608a7b7062f394c51a253 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 18 Oct 2019 14:10:31 +0200 Subject: netfilter: nf_tables_offload: restore basechain deletion Unbind callbacks on chain deletion. Fixes: 8fc618c52d16 ("netfilter: nf_tables_offload: refactor the nft_flow_offload_chain function") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index e546f759b7a7..ad783f4840ef 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -347,7 +347,7 @@ int nft_flow_rule_offload_commit(struct net *net) policy = nft_trans_chain_policy(trans); err = nft_flow_offload_chain(trans->ctx.chain, &policy, - FLOW_BLOCK_BIND); + FLOW_BLOCK_UNBIND); break; case NFT_MSG_NEWRULE: if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) -- cgit v1.2.3 From b24e7598db62386a95a3c8b9c75630c5d56fe077 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 23 Oct 2019 14:26:37 +0200 Subject: fuse: flush dirty data/metadata before non-truncate setattr If writeback cache is enabled, then writes might get reordered with chmod/chown/utimes. The problem with this is that performing the write in the fuse daemon might itself change some of these attributes. In such case the following sequence of operations will result in file ending up with the wrong mode, for example: int fd = open ("suid", O_WRONLY|O_CREAT|O_EXCL); write (fd, "1", 1); fchown (fd, 0, 0); fchmod (fd, 04755); close (fd); This patch fixes this by flushing pending writes before performing chown/chmod/utimes. Reported-by: Giuseppe Scrivano Tested-by: Giuseppe Scrivano Fixes: 4d99ff8f12eb ("fuse: Turn writeback cache on") Cc: # v3.15+ Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b77954a27538..54d638f9ba1c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1522,6 +1522,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, is_truncate = true; } + /* Flush dirty data/metadata before non-truncate SETATTR */ + if (is_wb && S_ISREG(inode->i_mode) && + attr->ia_valid & + (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | + ATTR_TIMES_SET)) { + err = write_inode_now(inode, true); + if (err) + return err; + + fuse_set_nowrite(inode); + fuse_release_nowrite(inode); + } + if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); -- cgit v1.2.3 From e4648309b85a78f8c787457832269a8712a8673e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 23 Oct 2019 14:26:37 +0200 Subject: fuse: truncate pending writes on O_TRUNC Make sure cached writes are not reordered around open(..., O_TRUNC), with the obvious wrong results. Fixes: 4d99ff8f12eb ("fuse: Turn writeback cache on") Cc: # v3.15+ Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0f0225686aee..6edf949b9139 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -217,7 +217,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) { struct fuse_conn *fc = get_fuse_conn(inode); int err; - bool lock_inode = (file->f_flags & O_TRUNC) && + bool is_wb_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && fc->writeback_cache; @@ -225,16 +225,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) if (err) return err; - if (lock_inode) + if (is_wb_truncate) { inode_lock(inode); + fuse_set_nowrite(inode); + } err = fuse_do_open(fc, get_node_id(inode), file, isdir); if (!err) fuse_finish_open(inode, file); - if (lock_inode) + if (is_wb_truncate) { + fuse_release_nowrite(inode); inode_unlock(inode); + } return err; } -- cgit v1.2.3 From 9de55a37fcc5f1550a743910f493197223f5e384 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Mon, 19 Aug 2019 11:10:30 -0600 Subject: fuse: Add changelog entries for protocols 7.1 - 7.8 Retroactively add changelog entry for FUSE protocols 7.1 through 7.8. Signed-off-by: Alan Somers Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 802b0377a49e..373cada89815 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -38,6 +38,43 @@ * * Protocol changelog: * + * 7.1: + * - add the following messages: + * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK, + * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE, + * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR, + * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR, + * FUSE_RELEASEDIR + * - add padding to messages to accommodate 32-bit servers on 64-bit kernels + * + * 7.2: + * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags + * - add FUSE_FSYNCDIR message + * + * 7.3: + * - add FUSE_ACCESS message + * - add FUSE_CREATE message + * - add filehandle to fuse_setattr_in + * + * 7.4: + * - add frsize to fuse_kstatfs + * - clean up request size limit checking + * + * 7.5: + * - add flags and max_write to fuse_init_out + * + * 7.6: + * - add max_readahead to fuse_init_in and fuse_init_out + * + * 7.7: + * - add FUSE_INTERRUPT message + * - add POSIX file lock support + * + * 7.8: + * - add lock_owner and flags fields to fuse_release_in + * - add FUSE_BMAP message + * - add FUSE_DESTROY message + * * 7.9: * - new fuse_getattr_in input argument of GETATTR * - add lk_flags in fuse_lk_in -- cgit v1.2.3 From 091d1a7267726ba162b12ce9332d76cdae602789 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 19 Aug 2019 08:48:26 +0300 Subject: fuse: redundant get_fuse_inode() calls in fuse_writepages_fill() Currently fuse_writepages_fill() calls get_fuse_inode() few times with the same argument. Signed-off-by: Vasily Averin Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6edf949b9139..db48a5cf8620 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2001,7 +2001,7 @@ static int fuse_writepages_fill(struct page *page, if (!data->ff) { err = -EIO; - data->ff = fuse_write_file_get(fc, get_fuse_inode(inode)); + data->ff = fuse_write_file_get(fc, fi); if (!data->ff) goto out_unlock; } @@ -2046,8 +2046,6 @@ static int fuse_writepages_fill(struct page *page, * under writeback, so we can release the page lock. */ if (data->wpa == NULL) { - struct fuse_inode *fi = get_fuse_inode(inode); - err = -ENOMEM; wpa = fuse_writepage_args_alloc(); if (!wpa) { -- cgit v1.2.3 From bacdcb6675e170bb2e8d3824da220e10274f42a7 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 23 Oct 2019 08:31:38 -0700 Subject: dmaengine: cppi41: Fix cppi41_dma_prep_slave_sg() when idle Yegor Yefremov reported that musb and ftdi uart can fail for the first open of the uart unless connected using a hub. This is because the first dma call done by musb_ep_program() must wait if cppi41 is PM runtime suspended. Otherwise musb_ep_program() continues with other non-dma packets before the DMA transfer is started causing at least ftdi uarts to fail to receive data. Let's fix the issue by waking up cppi41 with PM runtime calls added to cppi41_dma_prep_slave_sg() and return NULL if still idled. This way we have musb_ep_program() continue with PIO until cppi41 is awake. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Reported-by: Yegor Yefremov Signed-off-by: Tony Lindgren Cc: stable@vger.kernel.org # v4.9+ Link: https://lore.kernel.org/r/20191023153138.23442-1-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/dma/ti/cppi41.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ti/cppi41.c b/drivers/dma/ti/cppi41.c index 2f946f55076c..8c2f7ebe998c 100644 --- a/drivers/dma/ti/cppi41.c +++ b/drivers/dma/ti/cppi41.c @@ -586,9 +586,22 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( enum dma_transfer_direction dir, unsigned long tx_flags, void *context) { struct cppi41_channel *c = to_cpp41_chan(chan); + struct dma_async_tx_descriptor *txd = NULL; + struct cppi41_dd *cdd = c->cdd; struct cppi41_desc *d; struct scatterlist *sg; unsigned int i; + int error; + + error = pm_runtime_get(cdd->ddev.dev); + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + + return NULL; + } + + if (cdd->is_suspended) + goto err_out_not_ready; d = c->desc; for_each_sg(sgl, sg, sg_len, i) { @@ -611,7 +624,13 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( d++; } - return &c->txd; + txd = &c->txd; + +err_out_not_ready: + pm_runtime_mark_last_busy(cdd->ddev.dev); + pm_runtime_put_autosuspend(cdd->ddev.dev); + + return txd; } static void cppi41_compute_td_desc(struct cppi41_desc *d) -- cgit v1.2.3 From faac3604d05e8015567124e5ee79edc3f1568a89 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 2 Oct 2019 10:53:09 +0200 Subject: clk: samsung: exynos5433: Fix error paths Add checking the value returned by samsung_clk_alloc_reg_dump() and devm_kcalloc(). While fixing this, also release all gathered clocks. Fixes: 523d3de41f02 ("clk: samsung: exynos5433: Add support for runtime PM") Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Acked-by: Chanwoo Choi [s.nawrocki: squashed patch from K. Kozlowski adding missing slab.h header] Reported-by: kbuild test robot Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-exynos5433.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c index 7824c2ba3d8e..4b1aa9382ad2 100644 --- a/drivers/clk/samsung/clk-exynos5433.c +++ b/drivers/clk/samsung/clk-exynos5433.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -5584,6 +5585,8 @@ static int __init exynos5433_cmu_probe(struct platform_device *pdev) data->clk_save = samsung_clk_alloc_reg_dump(info->clk_regs, info->nr_clk_regs); + if (!data->clk_save) + return -ENOMEM; data->nr_clk_save = info->nr_clk_regs; data->clk_suspend = info->suspend_regs; data->nr_clk_suspend = info->nr_suspend_regs; @@ -5592,12 +5595,19 @@ static int __init exynos5433_cmu_probe(struct platform_device *pdev) if (data->nr_pclks > 0) { data->pclks = devm_kcalloc(dev, sizeof(struct clk *), data->nr_pclks, GFP_KERNEL); - + if (!data->pclks) { + kfree(data->clk_save); + return -ENOMEM; + } for (i = 0; i < data->nr_pclks; i++) { struct clk *clk = of_clk_get(dev->of_node, i); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + kfree(data->clk_save); + while (--i >= 0) + clk_put(data->pclks[i]); return PTR_ERR(clk); + } data->pclks[i] = clk; } } -- cgit v1.2.3 From c9f7567aff31348a3dcf54845f7e389f5df0c0c1 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 23 Oct 2019 09:41:18 +0200 Subject: clk: samsung: exynos542x: Move G3D subsystem clocks to its sub-CMU G3D clocks require special handling of their parent bus clock during power domain on/off sequences. Those clocks were not initially added to the sub-CMU handler, because that time there was no open-source driver for the G3D (MALI Panfrost) hardware module and it was not possible to test it. This patch fixes this issue. Parent clock for G3D hardware block is now properly preserved during G3D power domain on/off sequence. This restores proper MALI Panfrost performance broken by commit 8686764fc071 ("ARM: dts: exynos: Add G3D power domain to Exynos542x"). Reported-by: Marian Mihailescu Fixes: b06a532bf1fa ("clk: samsung: Add Exynos5 sub-CMU clock driver") Signed-off-by: Marek Szyprowski Tested-by: Marian Mihailescu Acked-by: Krzysztof Kozlowski Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-exynos5420.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 7670cc596c74..dfa862d55246 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -1172,8 +1172,6 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = { GATE(CLK_SCLK_ISP_SENSOR2, "sclk_isp_sensor2", "dout_isp_sensor2", GATE_TOP_SCLK_ISP, 12, CLK_SET_RATE_PARENT, 0), - GATE(CLK_G3D, "g3d", "mout_user_aclk_g3d", GATE_IP_G3D, 9, 0, 0), - /* CDREX */ GATE(CLK_CLKM_PHY0, "clkm_phy0", "dout_sclk_cdrex", GATE_BUS_CDREX0, 0, 0, 0), @@ -1248,6 +1246,15 @@ static struct exynos5_subcmu_reg_dump exynos5x_gsc_suspend_regs[] = { { DIV2_RATIO0, 0, 0x30 }, /* DIV dout_gscl_blk_300 */ }; +static const struct samsung_gate_clock exynos5x_g3d_gate_clks[] __initconst = { + GATE(CLK_G3D, "g3d", "mout_user_aclk_g3d", GATE_IP_G3D, 9, 0, 0), +}; + +static struct exynos5_subcmu_reg_dump exynos5x_g3d_suspend_regs[] = { + { GATE_IP_G3D, 0x3ff, 0x3ff }, /* G3D gates */ + { SRC_TOP5, 0, BIT(16) }, /* MUX mout_user_aclk_g3d */ +}; + static const struct samsung_div_clock exynos5x_mfc_div_clks[] __initconst = { DIV(0, "dout_mfc_blk", "mout_user_aclk333", DIV4_RATIO, 0, 2), }; @@ -1320,6 +1327,14 @@ static const struct exynos5_subcmu_info exynos5x_gsc_subcmu = { .pd_name = "GSC", }; +static const struct exynos5_subcmu_info exynos5x_g3d_subcmu = { + .gate_clks = exynos5x_g3d_gate_clks, + .nr_gate_clks = ARRAY_SIZE(exynos5x_g3d_gate_clks), + .suspend_regs = exynos5x_g3d_suspend_regs, + .nr_suspend_regs = ARRAY_SIZE(exynos5x_g3d_suspend_regs), + .pd_name = "G3D", +}; + static const struct exynos5_subcmu_info exynos5x_mfc_subcmu = { .div_clks = exynos5x_mfc_div_clks, .nr_div_clks = ARRAY_SIZE(exynos5x_mfc_div_clks), @@ -1351,6 +1366,7 @@ static const struct exynos5_subcmu_info exynos5800_mau_subcmu = { static const struct exynos5_subcmu_info *exynos5x_subcmus[] = { &exynos5x_disp_subcmu, &exynos5x_gsc_subcmu, + &exynos5x_g3d_subcmu, &exynos5x_mfc_subcmu, &exynos5x_mscl_subcmu, }; @@ -1358,6 +1374,7 @@ static const struct exynos5_subcmu_info *exynos5x_subcmus[] = { static const struct exynos5_subcmu_info *exynos5800_subcmus[] = { &exynos5x_disp_subcmu, &exynos5x_gsc_subcmu, + &exynos5x_g3d_subcmu, &exynos5x_mfc_subcmu, &exynos5x_mscl_subcmu, &exynos5800_mau_subcmu, -- cgit v1.2.3 From 4523817d51bc3b2ef38da768d004fda2c8bc41de Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 23 Oct 2019 14:46:44 +0100 Subject: ASoC: kirkwood: fix external clock probe defer When our call to get the external clock fails, we forget to clean up the enabled internal clock correctly. Enable the clock after we have obtained all our resources. Fixes: 84aac6c79bfd ("ASoC: kirkwood: fix loss of external clock at probe time") Signed-off-by: Russell King Link: https://lore.kernel.org/r/E1iNGyK-0004oF-6A@rmk-PC.armlinux.org.uk Signed-off-by: Mark Brown --- sound/soc/kirkwood/kirkwood-i2s.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index 61226fefe1c4..9575a636d016 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -555,10 +555,6 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev) return PTR_ERR(priv->clk); } - err = clk_prepare_enable(priv->clk); - if (err < 0) - return err; - priv->extclk = devm_clk_get(&pdev->dev, "extclk"); if (IS_ERR(priv->extclk)) { if (PTR_ERR(priv->extclk) == -EPROBE_DEFER) @@ -574,6 +570,10 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev) } } + err = clk_prepare_enable(priv->clk); + if (err < 0) + return err; + /* Some sensible defaults - this reflects the powerup values */ priv->ctl_play = KIRKWOOD_PLAYCTL_SIZE_24; priv->ctl_rec = KIRKWOOD_RECCTL_SIZE_24; -- cgit v1.2.3 From 901af18b6baade6a327e532427cbb233f4945f5d Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 23 Oct 2019 18:12:02 +0200 Subject: ASoC: hdmi-codec: drop mutex locking again This reverts commit eb1ecadb7f67dde94ef0efd3ddaed5cb6c9a65ed. This fixes the following warning reported by lockdep and a potential issue with hibernation ==================================== WARNING: pulseaudio/1297 still has locks held! 5.3.0+ #1826 Not tainted ------------------------------------ 1 lock held by pulseaudio/1297: #0: ee815308 (&hcp->lock){....}, at: hdmi_codec_startup+0x20/0x130 stack backtrace: CPU: 0 PID: 1297 Comm: pulseaudio Not tainted 5.3.0+ #1826 Hardware name: Marvell Dove (Cubox) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (futex_wait_queue_me+0x13c/0x19c) [] (futex_wait_queue_me) from [] (futex_wait+0x184/0x24c) [] (futex_wait) from [] (do_futex+0x334/0x598) [] (do_futex) from [] (sys_futex_time32+0x118/0x180) [] (sys_futex_time32) from [] (ret_fast_syscall+0x0/0x54) Exception stack(0xebd31fa8 to 0xebd31ff0) 1fa0: 00000000 ffffffff 000c8748 00000189 00000001 00000000 1fc0: 00000000 ffffffff 00000000 000000f0 00000000 00000000 00000000 00056200 1fe0: 000000f0 beac03a8 b6d6c835 b6d6f456 Fixes: eb1ecadb7f67 ("ASoC: hdmi-codec: re-introduce mutex locking") Reported-by: Russell King Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20191023161203.28955-2-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index b5fd8f08726e..f8b5b960e597 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -274,7 +274,7 @@ struct hdmi_codec_priv { uint8_t eld[MAX_ELD_BYTES]; struct snd_pcm_chmap *chmap_info; unsigned int chmap_idx; - struct mutex lock; + unsigned long busy; struct snd_soc_jack *jack; unsigned int jack_status; }; @@ -390,8 +390,8 @@ static int hdmi_codec_startup(struct snd_pcm_substream *substream, struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai); int ret = 0; - ret = mutex_trylock(&hcp->lock); - if (!ret) { + ret = test_and_set_bit(0, &hcp->busy); + if (ret) { dev_err(dai->dev, "Only one simultaneous stream supported!\n"); return -EINVAL; } @@ -419,7 +419,7 @@ static int hdmi_codec_startup(struct snd_pcm_substream *substream, err: /* Release the exclusive lock on error */ - mutex_unlock(&hcp->lock); + clear_bit(0, &hcp->busy); return ret; } @@ -431,7 +431,7 @@ static void hdmi_codec_shutdown(struct snd_pcm_substream *substream, hcp->chmap_idx = HDMI_CODEC_CHMAP_IDX_UNKNOWN; hcp->hcd.ops->audio_shutdown(dai->dev->parent, hcp->hcd.data); - mutex_unlock(&hcp->lock); + clear_bit(0, &hcp->busy); } static int hdmi_codec_hw_params(struct snd_pcm_substream *substream, @@ -811,8 +811,6 @@ static int hdmi_codec_probe(struct platform_device *pdev) return -ENOMEM; hcp->hcd = *hcd; - mutex_init(&hcp->lock); - daidrv = devm_kcalloc(dev, dai_count, sizeof(*daidrv), GFP_KERNEL); if (!daidrv) return -ENOMEM; -- cgit v1.2.3 From d10be65f87fc9d98ad3cbdc406e86745fe8c59e2 Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Tue, 22 Oct 2019 20:54:29 +0200 Subject: ASoC: rsnd: dma: fix SSI9 4/5/6/7 busif dma address Currently each SSI unit's busif dma address is calculated by following calculation formula: 0xec540000 + 0x1000 * id + busif / 4 * 0xA000 + busif % 4 * 0x400 But according to R-Car3 HW manual 41.1.4 Register Configuration, ssi9 4/5/6/7 busif data register address (SSI9_4_BUSIF/SSI9_5_BUSIF/SSI9_6_BUSIF/SSI9_7_BUSIF) are out of this rule. This patch updates the calculation formula to correct ssi9 4/5/6/7 busif data register address. Fixes: 5e45a6fab3b9 ("ASoc: rsnd: dma: Calculate dma address with consider of BUSIF") Signed-off-by: Jiada Wang Signed-off-by: Timo Wischer [erosca: minor improvements in commit description] Cc: Andrew Gabbasov Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Eugeniu Rosca Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/r/20191022185429.12769-1-erosca@de.adit-jv.com Signed-off-by: Mark Brown --- sound/soc/sh/rcar/dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index 0324a5c39619..28f65eba2bb4 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -508,10 +508,10 @@ static struct rsnd_mod_ops rsnd_dmapp_ops = { #define RDMA_SSI_I_N(addr, i) (addr ##_reg - 0x00300000 + (0x40 * i) + 0x8) #define RDMA_SSI_O_N(addr, i) (addr ##_reg - 0x00300000 + (0x40 * i) + 0xc) -#define RDMA_SSIU_I_N(addr, i, j) (addr ##_reg - 0x00441000 + (0x1000 * (i)) + (((j) / 4) * 0xA000) + (((j) % 4) * 0x400)) +#define RDMA_SSIU_I_N(addr, i, j) (addr ##_reg - 0x00441000 + (0x1000 * (i)) + (((j) / 4) * 0xA000) + (((j) % 4) * 0x400) - (0x4000 * ((i) / 9) * ((j) / 4))) #define RDMA_SSIU_O_N(addr, i, j) RDMA_SSIU_I_N(addr, i, j) -#define RDMA_SSIU_I_P(addr, i, j) (addr ##_reg - 0x00141000 + (0x1000 * (i)) + (((j) / 4) * 0xA000) + (((j) % 4) * 0x400)) +#define RDMA_SSIU_I_P(addr, i, j) (addr ##_reg - 0x00141000 + (0x1000 * (i)) + (((j) / 4) * 0xA000) + (((j) % 4) * 0x400) - (0x4000 * ((i) / 9) * ((j) / 4))) #define RDMA_SSIU_O_P(addr, i, j) RDMA_SSIU_I_P(addr, i, j) #define RDMA_SRC_I_N(addr, i) (addr ##_reg - 0x00500000 + (0x400 * i)) -- cgit v1.2.3 From dc39596a906d5b604f4e64597b6e904fc14625e8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 23 Oct 2019 14:46:49 +0100 Subject: ASoC: kirkwood: fix device remove ordering The devm conversion of kirkwood was incorrect; on removal, devm takes effect after the "remove" function has returned. So, the effect of the conversion was to change the order during remove from: - snd_soc_unregister_component() (unpublishes interfaces) - clk_disable_unprepare() - cleanup resources After the conversion, this became: - clk_disable_unprepare() - while the device may still be active - snd_soc_unregister_component() - cleanup resources Hence, it introduces a bug, where the internal clock for the device may be shut down before the device itself has been shut down. It is known that Marvell SoCs, including Dove, locks up if registers for a peripheral that has its clocks disabled are accessed. Fixes: f98fc0f8154e ("ASoC: kirkwood: replace platform to component") Signed-off-by: Russell King Link: https://lore.kernel.org/r/E1iNGyP-0004oN-BA@rmk-PC.armlinux.org.uk Signed-off-by: Mark Brown --- sound/soc/kirkwood/kirkwood-i2s.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index 9575a636d016..2a4ffe945177 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -587,7 +587,7 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev) priv->ctl_rec |= KIRKWOOD_RECCTL_BURST_128; } - err = devm_snd_soc_register_component(&pdev->dev, &kirkwood_soc_component, + err = snd_soc_register_component(&pdev->dev, &kirkwood_soc_component, soc_dai, 2); if (err) { dev_err(&pdev->dev, "snd_soc_register_component failed\n"); @@ -610,6 +610,7 @@ static int kirkwood_i2s_dev_remove(struct platform_device *pdev) { struct kirkwood_dma_data *priv = dev_get_drvdata(&pdev->dev); + snd_soc_unregister_component(&pdev->dev); if (!IS_ERR(priv->extclk)) clk_disable_unprepare(priv->extclk); clk_disable_unprepare(priv->clk); -- cgit v1.2.3 From 4cad2a574d342cd9b658db6e32e9024f559383ed Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Wed, 23 Oct 2019 14:21:57 +0200 Subject: panfrost: Properly undo pm_runtime_enable when deferring a probe When deferring the probe because of a missing regulator, we were calling pm_runtime_disable even if pm_runtime_enable wasn't called. Move the call to pm_runtime_disable to the right place. Fixes: 635430797d3f ("drm/panfrost: Rework runtime PM initialization") Reported-by: Chen-Yu Tsai Cc: Robin Murphy Signed-off-by: Tomeu Vizoso Reviewed-by: Robin Murphy Reviewed-by: Steven Price Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20191023122157.32067-1-tomeu.vizoso@collabora.com --- drivers/gpu/drm/panfrost/panfrost_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index bc2ddeb55f5d..f21bc8a7ee3a 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -556,11 +556,11 @@ static int panfrost_probe(struct platform_device *pdev) return 0; err_out2: + pm_runtime_disable(pfdev->dev); panfrost_devfreq_fini(pfdev); err_out1: panfrost_device_fini(pfdev); err_out0: - pm_runtime_disable(pfdev->dev); drm_dev_put(ddev); return err; } -- cgit v1.2.3 From 29cd13cfd7624726d9e6becbae9aa419ef35af7f Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Mon, 21 Oct 2019 13:52:49 -0500 Subject: drm/v3d: Fix memory leak in v3d_submit_cl_ioctl In the impelementation of v3d_submit_cl_ioctl() there are two memory leaks. One is when allocation for bin fails, and the other is when bin initialization fails. If kcalloc fails to allocate memory for bin then render->base should be put. Also, if v3d_job_init() fails to initialize bin->base then allocated memory for bin should be released. Fixes: a783a09ee76d ("drm/v3d: Refactor job management.") Signed-off-by: Navid Emamdoost Reviewed-by: Eric Anholt Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20191021185250.26130-1-navid.emamdoost@gmail.com --- drivers/gpu/drm/v3d/v3d_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 5d80507b539b..19c092d75266 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -557,13 +557,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, if (args->bcl_start != args->bcl_end) { bin = kcalloc(1, sizeof(*bin), GFP_KERNEL); - if (!bin) + if (!bin) { + v3d_job_put(&render->base); return -ENOMEM; + } ret = v3d_job_init(v3d, file_priv, &bin->base, v3d_job_free, args->in_sync_bcl); if (ret) { v3d_job_put(&render->base); + kfree(bin); return ret; } -- cgit v1.2.3 From 55667441c84fa5e0911a0aac44fb059c15ba6da2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Oct 2019 07:57:46 -0700 Subject: net/flow_dissector: switch to siphash UDP IPv6 packets auto flowlabels are using a 32bit secret (static u32 hashrnd in net/core/flow_dissector.c) and apply jhash() over fields known by the receivers. Attackers can easily infer the 32bit secret and use this information to identify a device and/or user, since this 32bit secret is only set at boot time. Really, using jhash() to generate cookies sent on the wire is a serious security concern. Trying to change the rol32(hash, 16) in ip6_make_flowlabel() would be a dead end. Trying to periodically change the secret (like in sch_sfq.c) could change paths taken in the network for long lived flows. Let's switch to siphash, as we did in commit df453700e8d8 ("inet: switch IP ID generator to siphash") Using a cryptographically strong pseudo random function will solve this privacy issue and more generally remove other weak points in the stack. Packet schedulers using skb_get_hash_perturb() benefit from this change. Fixes: b56774163f99 ("ipv6: Enable auto flow labels by default") Fixes: 42240901f7c4 ("ipv6: Implement different admin modes for automatic flow labels") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Fixes: cb1ce2ef387b ("ipv6: Implement automatic flow label generation on transmit") Signed-off-by: Eric Dumazet Reported-by: Jonathan Berger Reported-by: Amit Klein Reported-by: Benny Pinkas Cc: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- include/net/flow_dissector.h | 3 ++- include/net/fq.h | 2 +- include/net/fq_impl.h | 4 ++-- net/core/flow_dissector.c | 38 ++++++++++++++++---------------------- net/sched/sch_hhf.c | 8 ++++---- net/sched/sch_sfb.c | 13 +++++++------ net/sched/sch_sfq.c | 14 ++++++++------ 8 files changed, 42 insertions(+), 43 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7914fdaf4226..a391147c03d4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1354,7 +1354,8 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 return skb->hash; } -__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); +__u32 skb_get_hash_perturb(const struct sk_buff *skb, + const siphash_key_t *perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 90bd210be060..5cd12276ae21 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -4,6 +4,7 @@ #include #include +#include #include /** @@ -276,7 +277,7 @@ struct flow_keys_basic { struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic - struct flow_dissector_key_basic basic; + struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT); struct flow_dissector_key_tags tags; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_vlan cvlan; diff --git a/include/net/fq.h b/include/net/fq.h index d126b5d20261..2ad85e683041 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -69,7 +69,7 @@ struct fq { struct list_head backlogs; spinlock_t lock; u32 flows_cnt; - u32 perturbation; + siphash_key_t perturbation; u32 limit; u32 memory_limit; u32 memory_usage; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index be40a4b327e3..107c0d700ed6 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -108,7 +108,7 @@ begin: static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) { - u32 hash = skb_get_hash_perturb(skb, fq->perturbation); + u32 hash = skb_get_hash_perturb(skb, &fq->perturbation); return reciprocal_scale(hash, fq->flows_cnt); } @@ -308,7 +308,7 @@ static int fq_init(struct fq *fq, int flows_cnt) INIT_LIST_HEAD(&fq->backlogs); spin_lock_init(&fq->lock); fq->flows_cnt = max_t(u32, flows_cnt, 1); - fq->perturbation = prandom_u32(); + get_random_bytes(&fq->perturbation, sizeof(fq->perturbation)); fq->quantum = 300; fq->limit = 8192; fq->memory_limit = 16 << 20; /* 16 MBytes */ diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 7c09d87d3269..68eda10d0680 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1350,30 +1350,21 @@ out_bad: } EXPORT_SYMBOL(__skb_flow_dissect); -static u32 hashrnd __read_mostly; +static siphash_key_t hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_words(const u32 *words, u32 length, - u32 keyval) +static const void *flow_keys_hash_start(const struct flow_keys *flow) { - return jhash2(words, length, keyval); -} - -static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow) -{ - const void *p = flow; - - BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); - return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET); + BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); + return &flow->FLOW_KEYS_HASH_START_FIELD; } static inline size_t flow_keys_hash_length(const struct flow_keys *flow) { size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); - BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != sizeof(*flow) - sizeof(flow->addrs)); @@ -1388,7 +1379,7 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow) diff -= sizeof(flow->addrs.tipckey); break; } - return (sizeof(*flow) - diff) / sizeof(u32); + return sizeof(*flow) - diff; } __be32 flow_get_u32_src(const struct flow_keys *flow) @@ -1454,14 +1445,15 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) } } -static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, + const siphash_key_t *keyval) { u32 hash; __flow_hash_consistentify(keys); - hash = __flow_hash_words(flow_keys_hash_start(keys), - flow_keys_hash_length(keys), keyval); + hash = siphash(flow_keys_hash_start(keys), + flow_keys_hash_length(keys), keyval); if (!hash) hash = 1; @@ -1471,12 +1463,13 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) u32 flow_hash_from_keys(struct flow_keys *keys) { __flow_hash_secret_init(); - return __flow_hash_from_keys(keys, hashrnd); + return __flow_hash_from_keys(keys, &hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); static inline u32 ___skb_get_hash(const struct sk_buff *skb, - struct flow_keys *keys, u32 keyval) + struct flow_keys *keys, + const siphash_key_t *keyval) { skb_flow_dissect_flow_keys(skb, keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); @@ -1524,7 +1517,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) &keys, NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); - return __flow_hash_from_keys(&keys, hashrnd); + return __flow_hash_from_keys(&keys, &hashrnd); } EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); @@ -1544,13 +1537,14 @@ void __skb_get_hash(struct sk_buff *skb) __flow_hash_secret_init(); - hash = ___skb_get_hash(skb, &keys, hashrnd); + hash = ___skb_get_hash(skb, &keys, &hashrnd); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } EXPORT_SYMBOL(__skb_get_hash); -__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) +__u32 skb_get_hash_perturb(const struct sk_buff *skb, + const siphash_key_t *perturb) { struct flow_keys keys; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 23cd1c873a2c..be35f03b657b 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -5,11 +5,11 @@ * Copyright (C) 2013 Nandita Dukkipati */ -#include #include #include #include #include +#include #include #include @@ -126,7 +126,7 @@ struct wdrr_bucket { struct hhf_sched_data { struct wdrr_bucket buckets[WDRR_BUCKET_CNT]; - u32 perturbation; /* hash perturbation */ + siphash_key_t perturbation; /* hash perturbation */ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ u32 drop_overlimit; /* number of times max qdisc packet * limit was hit @@ -264,7 +264,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) } /* Get hashed flow-id of the skb. */ - hash = skb_get_hash_perturb(skb, q->perturbation); + hash = skb_get_hash_perturb(skb, &q->perturbation); /* Check if this packet belongs to an already established HH flow. */ flow_pos = hash & HHF_BIT_MASK; @@ -582,7 +582,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt, sch->limit = 1000; q->quantum = psched_mtu(qdisc_dev(sch)); - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); INIT_LIST_HEAD(&q->new_buckets); INIT_LIST_HEAD(&q->old_buckets); diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index d448fe3068e5..4074c50ac3d7 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -45,7 +45,7 @@ struct sfb_bucket { * (Section 4.4 of SFB reference : moving hash functions) */ struct sfb_bins { - u32 perturbation; /* jhash perturbation */ + siphash_key_t perturbation; /* siphash key */ struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; }; @@ -217,7 +217,8 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) { - q->bins[slot].perturbation = prandom_u32(); + get_random_bytes(&q->bins[slot].perturbation, + sizeof(q->bins[slot].perturbation)); } static void sfb_swap_slot(struct sfb_sched_data *q) @@ -314,9 +315,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* If using external classifiers, get result and record it. */ if (!sfb_classify(skb, fl, &ret, &salt)) goto other_drop; - sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation); } else { - sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation); + sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation); } @@ -352,7 +353,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Inelastic flow */ if (q->double_buffering) { sfbhash = skb_get_hash_perturb(skb, - q->bins[slot].perturbation); + &q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 68404a9d2ce4..c787d4d46017 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -117,7 +117,7 @@ struct sfq_sched_data { u8 headdrop; u8 maxdepth; /* limit of packets per flow */ - u32 perturbation; + siphash_key_t perturbation; u8 cur_depth; /* depth of longest slot */ u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ @@ -157,7 +157,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index static unsigned int sfq_hash(const struct sfq_sched_data *q, const struct sk_buff *skb) { - return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1); + return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1); } static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, @@ -607,9 +607,11 @@ static void sfq_perturbation(struct timer_list *t) struct sfq_sched_data *q = from_timer(q, t, perturb_timer); struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + siphash_key_t nkey; + get_random_bytes(&nkey, sizeof(nkey)); spin_lock(root_lock); - q->perturbation = prandom_u32(); + q->perturbation = nkey; if (!q->filter_list && q->tail) sfq_rehash(sch); spin_unlock(root_lock); @@ -688,7 +690,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) del_timer(&q->perturb_timer); if (q->perturb_period) { mod_timer(&q->perturb_timer, jiffies + q->perturb_period); - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); } sch_tree_unlock(sch); kfree(p); @@ -745,7 +747,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt, q->quantum = psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); if (opt) { int err = sfq_change(sch, opt); -- cgit v1.2.3 From 2afd23f78f39da84937006ecd24aa664a4ab052b Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 21 Oct 2019 10:16:58 +0200 Subject: xsk: Fix registration of Rx-only sockets Having Rx-only AF_XDP sockets can potentially lead to a crash in the system by a NULL pointer dereference in xsk_umem_consume_tx(). This function iterates through a list of all sockets tied to a umem and checks if there are any packets to send on the Tx ring. Rx-only sockets do not have a Tx ring, so this will cause a NULL pointer dereference. This will happen if you have registered one or more Rx-only sockets to a umem and the driver is checking the Tx ring even on Rx, or if the XDP_SHARED_UMEM mode is used and there is a mix of Rx-only and other sockets tied to the same umem. Fixed by only putting sockets with a Tx component on the list that xsk_umem_consume_tx() iterates over. Fixes: ac98d8aab61b ("xsk: wire upp Tx zero-copy functions") Reported-by: Kal Cutter Conley Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Acked-by: Jonathan Lemon Link: https://lore.kernel.org/bpf/1571645818-16244-1-git-send-email-magnus.karlsson@intel.com --- net/xdp/xdp_umem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 16d5f353163a..3049af269fbf 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -27,6 +27,9 @@ void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) { unsigned long flags; + if (!xs->tx) + return; + spin_lock_irqsave(&umem->xsk_list_lock, flags); list_add_rcu(&xs->list, &umem->xsk_list); spin_unlock_irqrestore(&umem->xsk_list_lock, flags); @@ -36,6 +39,9 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) { unsigned long flags; + if (!xs->tx) + return; + spin_lock_irqsave(&umem->xsk_list_lock, flags); list_del_rcu(&xs->list); spin_unlock_irqrestore(&umem->xsk_list_lock, flags); -- cgit v1.2.3 From 76db2d466f6a929a04775f0f87d837e3bcba44e8 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Wed, 23 Oct 2019 11:44:24 +0200 Subject: net: phy: smsc: LAN8740: add PHY_RST_AFTER_CLK_EN flag The LAN8740, like the 8720, also requires a reset after enabling clock. The datasheet [1] 3.8.5.1 says: "During a Hardware reset, an external clock must be supplied to the XTAL1/CLKIN signal." I have observed this issue on a custom i.MX6 based board with the LAN8740A. [1] http://ww1.microchip.com/downloads/en/DeviceDoc/8740a.pdf Signed-off-by: Martin Fuzzey Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/smsc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index dc3d92d340c4..b73298250793 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -327,6 +327,7 @@ static struct phy_driver smsc_phy_driver[] = { .name = "SMSC LAN8740", /* PHY_BASIC_FEATURES */ + .flags = PHY_RST_AFTER_CLK_EN, .probe = smsc_phy_probe, -- cgit v1.2.3 From 47db5652517684ee6c2102b944f977bf77aa9635 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 22 Oct 2019 16:06:19 +0200 Subject: reset: fix of_reset_simple_xlate kerneldoc comment The flags parameter never made it into the API, but was erroneously included in the kerneldoc comment. Remove it to fix a documentation build warning: ./drivers/reset/core.c:86: warning: Excess function parameter 'flags' description in 'of_reset_simple_xlate' Fixes: 61fc41317666 ("reset: Add reset controller API") Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 36b1ff69b1e2..1b9e71238958 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -76,7 +76,6 @@ static const char *rcdev_name(struct reset_controller_dev *rcdev) * of_reset_simple_xlate - translate reset_spec to the reset line number * @rcdev: a pointer to the reset controller device * @reset_spec: reset line specifier as found in the device tree - * @flags: a flags pointer to fill in (optional) * * This simple translation function should be used for reset controllers * with 1:1 mapping, where reset lines can be indexed by number without gaps. -- cgit v1.2.3 From 77d551b9ad009ad6c9cdff3894689884a64972e8 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 22 Oct 2019 16:09:50 +0200 Subject: reset: fix of_reset_control_get_count kerneldoc comment Add a newline and remove a superfluous kerneldoc marker before the of_reset_control_get_count kerneldoc comment, to fix documentation build warnings: ./drivers/reset/core.c:832: warning: Incorrect use of kernel-doc format: * of_reset_control_get_count - Count number of resets available with a device ./drivers/reset/core.c:840: warning: Function parameter or member 'node' not described in 'of_reset_control_get_count' Fixes: 17c82e206d2a ("reset: Add APIs to manage array of resets") Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 1b9e71238958..3c9a64c1b7a8 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -825,9 +825,10 @@ int __device_reset(struct device *dev, bool optional) } EXPORT_SYMBOL_GPL(__device_reset); -/** +/* * APIs to manage an array of reset controls. */ + /** * of_reset_control_get_count - Count number of resets available with a device * -- cgit v1.2.3 From ed713ce242545388849e411da24cd72c15b2e8e6 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 22 Oct 2019 16:11:54 +0200 Subject: reset: fix reset_control_lookup kerneldoc comment Add a missing colon to fix a documentation build warning: ./include/linux/reset-controller.h:45: warning: Function parameter or member 'con_id' not described in 'reset_control_lookup' Fixes: 6691dffab0ab ("reset: add support for non-DT systems") Signed-off-by: Philipp Zabel --- include/linux/reset-controller.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index 9326d671b6e6..984f625d5593 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -33,7 +33,7 @@ struct of_phandle_args; * @provider: name of the reset controller device controlling this reset line * @index: ID of the reset controller in the reset controller device * @dev_id: name of the device associated with this reset line - * @con_id name of the reset line (can be NULL) + * @con_id: name of the reset line (can be NULL) */ struct reset_control_lookup { struct list_head list; -- cgit v1.2.3 From b9e9348d381ea2e2a8782e753a686cf57c26ec60 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 22 Oct 2019 18:19:22 +0200 Subject: reset: fix reset_control_get_exclusive kerneldoc comment Add missing parentheses to correctly hyperlink the reference to reset_control_get_shared(). Fixes: 0b52297f2288 ("reset: Add support for shared reset controls") Reviewed-by: Hans de Goede Signed-off-by: Philipp Zabel --- include/linux/reset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/reset.h b/include/linux/reset.h index e7793fc0fa93..eb597e8aa430 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -143,7 +143,7 @@ static inline int device_reset_optional(struct device *dev) * If this function is called more than once for the same reset_control it will * return -EBUSY. * - * See reset_control_get_shared for details on shared references to + * See reset_control_get_shared() for details on shared references to * reset-controls. * * Use of id names is optional. -- cgit v1.2.3 From 62931f59ce9cbabb934a431f48f2f1f441c605ac Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Sat, 19 Oct 2019 17:34:35 +0200 Subject: ipvs: don't ignore errors in case refcounting ip_vs module fails if the IPVS module is removed while the sync daemon is starting, there is a small gap where try_module_get() might fail getting the refcount inside ip_vs_use_count_inc(). Then, the refcounts of IPVS module are unbalanced, and the subsequent call to stop_sync_thread() causes the following splat: WARNING: CPU: 0 PID: 4013 at kernel/module.c:1146 module_put.part.44+0x15b/0x290 Modules linked in: ip_vs(-) nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 veth ip6table_filter ip6_tables iptable_filter binfmt_misc intel_rapl_msr intel_rapl_common crct10dif_pclmul crc32_pclmul ext4 mbcache jbd2 ghash_clmulni_intel snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_nhlt snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper joydev pcspkr snd_timer virtio_balloon snd soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi virtio_net net_failover virtio_blk failover virtio_console qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ata_piix ttm crc32c_intel serio_raw drm virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: nf_defrag_ipv6] CPU: 0 PID: 4013 Comm: modprobe Tainted: G W 5.4.0-rc1.upstream+ #741 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:module_put.part.44+0x15b/0x290 Code: 04 25 28 00 00 00 0f 85 18 01 00 00 48 83 c4 68 5b 5d 41 5c 41 5d 41 5e 41 5f c3 89 44 24 28 83 e8 01 89 c5 0f 89 57 ff ff ff <0f> 0b e9 78 ff ff ff 65 8b 1d 67 83 26 4a 89 db be 08 00 00 00 48 RSP: 0018:ffff888050607c78 EFLAGS: 00010297 RAX: 0000000000000003 RBX: ffffffffc1420590 RCX: ffffffffb5db0ef9 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffffc1420590 RBP: 00000000ffffffff R08: fffffbfff82840b3 R09: fffffbfff82840b3 R10: 0000000000000001 R11: fffffbfff82840b2 R12: 1ffff1100a0c0f90 R13: ffffffffc1420200 R14: ffff88804f533300 R15: ffff88804f533ca0 FS: 00007f8ea9720740(0000) GS:ffff888053800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3245abe000 CR3: 000000004c28a006 CR4: 00000000001606f0 Call Trace: stop_sync_thread+0x3a3/0x7c0 [ip_vs] ip_vs_sync_net_cleanup+0x13/0x50 [ip_vs] ops_exit_list.isra.5+0x94/0x140 unregister_pernet_operations+0x29d/0x460 unregister_pernet_device+0x26/0x60 ip_vs_cleanup+0x11/0x38 [ip_vs] __x64_sys_delete_module+0x2d5/0x400 do_syscall_64+0xa5/0x4e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f8ea8bf0db7 Code: 73 01 c3 48 8b 0d b9 80 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 89 80 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffcd38d2fe8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000002436240 RCX: 00007f8ea8bf0db7 RDX: 0000000000000000 RSI: 0000000000000800 RDI: 00000000024362a8 RBP: 0000000000000000 R08: 00007f8ea8eba060 R09: 00007f8ea8c658a0 R10: 00007ffcd38d2a60 R11: 0000000000000206 R12: 0000000000000000 R13: 0000000000000001 R14: 00000000024362a8 R15: 0000000000000000 irq event stamp: 4538 hardirqs last enabled at (4537): [] quarantine_put+0x9e/0x170 hardirqs last disabled at (4538): [] trace_hardirqs_off_thunk+0x1a/0x20 softirqs last enabled at (4522): [] sk_common_release+0x169/0x2d0 softirqs last disabled at (4520): [] sk_common_release+0xbe/0x2d0 Check the return value of ip_vs_use_count_inc() and let its caller return proper error. Inside do_ip_vs_set_ctl() the module is already refcounted, we don't need refcount/derefcount there. Finally, in register_ip_vs_app() and start_sync_thread(), take the module refcount earlier and ensure it's released in the error path. Change since v1: - better return values in case of failure of ip_vs_use_count_inc(), thanks to Julian Anastasov - no need to increase/decrease the module refcount in ip_vs_set_ctl(), thanks to Julian Anastasov Signed-off-by: Davide Caratti Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_app.c | 12 ++++++++++-- net/netfilter/ipvs/ip_vs_ctl.c | 14 ++++---------- net/netfilter/ipvs/ip_vs_pe.c | 3 ++- net/netfilter/ipvs/ip_vs_sched.c | 3 ++- net/netfilter/ipvs/ip_vs_sync.c | 13 ++++++++++--- 5 files changed, 28 insertions(+), 17 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 4515056ef1c2..f9b16f2b2219 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -193,21 +193,29 @@ struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app * mutex_lock(&__ip_vs_app_mutex); + /* increase the module use count */ + if (!ip_vs_use_count_inc()) { + err = -ENOENT; + goto out_unlock; + } + list_for_each_entry(a, &ipvs->app_list, a_list) { if (!strcmp(app->name, a->name)) { err = -EEXIST; + /* decrease the module use count */ + ip_vs_use_count_dec(); goto out_unlock; } } a = kmemdup(app, sizeof(*app), GFP_KERNEL); if (!a) { err = -ENOMEM; + /* decrease the module use count */ + ip_vs_use_count_dec(); goto out_unlock; } INIT_LIST_HEAD(&a->incs_list); list_add(&a->a_list, &ipvs->app_list); - /* increase the module use count */ - ip_vs_use_count_inc(); out_unlock: mutex_unlock(&__ip_vs_app_mutex); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 8b48e7ce1c2c..c8f81dd15c83 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1275,7 +1275,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service *svc = NULL; /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOPROTOOPT; /* Lookup the scheduler by 'u->sched_name' */ if (strcmp(u->sched_name, "none")) { @@ -2435,9 +2436,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (copy_from_user(arg, user, len) != 0) return -EFAULT; - /* increase the module use count */ - ip_vs_use_count_inc(); - /* Handle daemons since they have another lock */ if (cmd == IP_VS_SO_SET_STARTDAEMON || cmd == IP_VS_SO_SET_STOPDAEMON) { @@ -2450,13 +2448,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ret = -EINVAL; if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, sizeof(cfg.mcast_ifn)) <= 0) - goto out_dec; + return ret; cfg.syncid = dm->syncid; ret = start_sync_thread(ipvs, &cfg, dm->state); } else { ret = stop_sync_thread(ipvs, dm->state); } - goto out_dec; + return ret; } mutex_lock(&__ip_vs_mutex); @@ -2551,10 +2549,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) out_unlock: mutex_unlock(&__ip_vs_mutex); - out_dec: - /* decrease the module use count */ - ip_vs_use_count_dec(); - return ret; } diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 8e104dff7abc..166c669f0763 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -68,7 +68,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) struct ip_vs_pe *tmp; /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOENT; mutex_lock(&ip_vs_pe_mutex); /* Make sure that the pe with this name doesn't exist diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 2f9d5cd5daee..d4903723be7e 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -179,7 +179,8 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) } /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOENT; mutex_lock(&ip_vs_sched_mutex); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index a4a78c4b06de..8dc892a9dc91 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1762,6 +1762,10 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + /* increase the module use count */ + if (!ip_vs_use_count_inc()) + return -ENOPROTOOPT; + /* Do not hold one mutex and then to block on another */ for (;;) { rtnl_lock(); @@ -1892,9 +1896,6 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); - /* increase the module use count */ - ip_vs_use_count_inc(); - return 0; out: @@ -1924,11 +1925,17 @@ out: } kfree(ti); } + + /* decrease the module use count */ + ip_vs_use_count_dec(); return result; out_early: mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); + + /* decrease the module use count */ + ip_vs_use_count_dec(); return result; } -- cgit v1.2.3 From c24b75e0f9239e78105f81c5f03a751641eb07ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 09:53:03 -0700 Subject: ipvs: move old_secure_tcp into struct netns_ipvs syzbot reported the following issue : BUG: KCSAN: data-race in update_defense_level / update_defense_level read to 0xffffffff861a6260 of 4 bytes by task 3006 on cpu 1: update_defense_level+0x621/0xb30 net/netfilter/ipvs/ip_vs_ctl.c:177 defense_work_handler+0x3d/0xd0 net/netfilter/ipvs/ip_vs_ctl.c:225 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 write to 0xffffffff861a6260 of 4 bytes by task 7333 on cpu 0: update_defense_level+0xa62/0xb30 net/netfilter/ipvs/ip_vs_ctl.c:205 defense_work_handler+0x3d/0xd0 net/netfilter/ipvs/ip_vs_ctl.c:225 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 7333 Comm: kworker/0:5 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events defense_work_handler Indeed, old_secure_tcp is currently a static variable, while it needs to be a per netns variable. Fixes: a0840e2e165a ("IPVS: netns, ip_vs_ctl local vars moved to ipvs struct.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Simon Horman --- include/net/ip_vs.h | 1 + net/netfilter/ipvs/ip_vs_ctl.c | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3759167f91f5..078887c8c586 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -889,6 +889,7 @@ struct netns_ipvs { struct delayed_work defense_work; /* Work handler */ int drop_rate; int drop_counter; + int old_secure_tcp; atomic_t dropentry; /* locks in ctl.c */ spinlock_t dropentry_lock; /* drop entry handling */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c8f81dd15c83..3cccc88ef817 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -93,7 +93,6 @@ static bool __ip_vs_addr_is_local_v6(struct net *net, static void update_defense_level(struct netns_ipvs *ipvs) { struct sysinfo i; - static int old_secure_tcp = 0; int availmem; int nomem; int to_change = -1; @@ -174,35 +173,35 @@ static void update_defense_level(struct netns_ipvs *ipvs) spin_lock(&ipvs->securetcp_lock); switch (ipvs->sysctl_secure_tcp) { case 0: - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; break; case 1: if (nomem) { - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; ipvs->sysctl_secure_tcp = 2; } else { - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; } break; case 2: if (nomem) { - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; } else { - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; ipvs->sysctl_secure_tcp = 1; } break; case 3: - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; break; } - old_secure_tcp = ipvs->sysctl_secure_tcp; + ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; if (to_change >= 0) ip_vs_protocol_timeout_change(ipvs, ipvs->sysctl_secure_tcp > 1); -- cgit v1.2.3 From a69a85da458f79088c38a38db034a4d64d9c32c3 Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 24 Oct 2019 15:52:45 +0800 Subject: netfilter: nft_payload: fix missing check for matching length in offloads Payload offload rule should also check the length of the match. Moreover, check for unsupported link-layer fields: nft --debug=netlink add rule firewall zones vlan id 100 ... [ payload load 2b @ link header + 0 => reg 1 ] this loads 2byte base on ll header and offset 0. This also fixes unsupported raw payload match. Fixes: 92ad6325cb89 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_payload.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 22a80eb60222..5cb2d8908d2a 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -161,13 +161,21 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ethhdr, h_source): + if (priv->len != ETH_ALEN) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, src, ETH_ALEN, reg); break; case offsetof(struct ethhdr, h_dest): + if (priv->len != ETH_ALEN) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, dst, ETH_ALEN, reg); break; + default: + return -EOPNOTSUPP; } return 0; @@ -181,14 +189,23 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct iphdr, saddr): + if (priv->len != sizeof(struct in_addr)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src, sizeof(struct in_addr), reg); break; case offsetof(struct iphdr, daddr): + if (priv->len != sizeof(struct in_addr)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst, sizeof(struct in_addr), reg); break; case offsetof(struct iphdr, protocol): + if (priv->len != sizeof(__u8)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); @@ -208,14 +225,23 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ipv6hdr, saddr): + if (priv->len != sizeof(struct in6_addr)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src, sizeof(struct in6_addr), reg); break; case offsetof(struct ipv6hdr, daddr): + if (priv->len != sizeof(struct in6_addr)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst, sizeof(struct in6_addr), reg); break; case offsetof(struct ipv6hdr, nexthdr): + if (priv->len != sizeof(__u8)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); @@ -255,10 +281,16 @@ static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct tcphdr, source): + if (priv->len != sizeof(__be16)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct tcphdr, dest): + if (priv->len != sizeof(__be16)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, sizeof(__be16), reg); break; @@ -277,10 +309,16 @@ static int nft_payload_offload_udp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct udphdr, source): + if (priv->len != sizeof(__be16)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct udphdr, dest): + if (priv->len != sizeof(__be16)) + return -EOPNOTSUPP; + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, sizeof(__be16), reg); break; -- cgit v1.2.3 From e2995b95a914bbc6b5352be27d5d5f33ec802d2c Mon Sep 17 00:00:00 2001 From: Justin Song Date: Thu, 24 Oct 2019 12:27:14 +0200 Subject: ALSA: usb-audio: Add DSD support for Gustard U16/X26 USB Interface This patch adds native DSD support for Gustard U16/X26 USB Interface. Tested using VID and fp->dsd_raw method. Signed-off-by: Justin Song Cc: Link: https://lore.kernel.org/r/CA+9XP1ipsFn+r3bCBKRinQv-JrJ+EHOGBdZWZoMwxFv0R8Y1MQ@mail.gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index fbfde996fee7..0bbe1201a6ac 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1657,6 +1657,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x23ba: /* Playback Designs */ case 0x25ce: /* Mytek devices */ case 0x278b: /* Rotel? */ + case 0x292b: /* Gustard/Ess based devices */ case 0x2ab6: /* T+A devices */ case 0x3842: /* EVGA */ case 0xc502: /* HiBy devices */ -- cgit v1.2.3 From f0778871a13889b86a65d4ad34bef8340af9d082 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 24 Oct 2019 15:13:32 +0800 Subject: ALSA: hda/realtek - Add support for ALC623 Support new codec ALC623. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/ed97b6a8bd9445ecb48bc763d9aaba7a@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 085a2f95e076..a0c237cc13d4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -409,6 +409,9 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0672: alc_update_coef_idx(codec, 0xd, 0, 1<<14); /* EAPD Ctrl */ break; + case 0x10ec0623: + alc_update_coef_idx(codec, 0x19, 1<<13, 0); + break; case 0x10ec0668: alc_update_coef_idx(codec, 0x7, 3<<13, 0); break; @@ -2920,6 +2923,7 @@ enum { ALC269_TYPE_ALC225, ALC269_TYPE_ALC294, ALC269_TYPE_ALC300, + ALC269_TYPE_ALC623, ALC269_TYPE_ALC700, }; @@ -2955,6 +2959,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec) case ALC269_TYPE_ALC225: case ALC269_TYPE_ALC294: case ALC269_TYPE_ALC300: + case ALC269_TYPE_ALC623: case ALC269_TYPE_ALC700: ssids = alc269_ssids; break; @@ -8017,6 +8022,9 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC300; spec->gen.mixer_nid = 0; /* no loopback on ALC300 */ break; + case 0x10ec0623: + spec->codec_variant = ALC269_TYPE_ALC623; + break; case 0x10ec0700: case 0x10ec0701: case 0x10ec0703: @@ -9218,6 +9226,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269), HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269), HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0623, "ALC623", patch_alc269), HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861), HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd), HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861), -- cgit v1.2.3 From 7b47ad3322d114736e08cc244a84fe7af9b861f6 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 22 Oct 2019 18:31:47 -0500 Subject: soundwire: slave: fix scanf format fix cppcheck warning: [drivers/soundwire/slave.c:145]: (warning) %x in format string (no. 1) requires 'unsigned int *' but the argument type is 'signed int *'. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191022233147.17268-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/slave.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soundwire/slave.c b/drivers/soundwire/slave.c index 48a63ca130d2..6473fa602f82 100644 --- a/drivers/soundwire/slave.c +++ b/drivers/soundwire/slave.c @@ -128,7 +128,8 @@ int sdw_of_find_slaves(struct sdw_bus *bus) struct device_node *node; for_each_child_of_node(bus->dev->of_node, node) { - int link_id, sdw_version, ret, len; + int link_id, ret, len; + unsigned int sdw_version; const char *compat = NULL; struct sdw_slave_id id; const __be32 *addr; -- cgit v1.2.3 From 8a6c55d0f883e9a7e7c91841434f3b6bbf932bb2 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Thu, 24 Oct 2019 19:44:39 +0800 Subject: ALSA: hda/realtek - Fix 2 front mics of codec 0x623 These 2 ThinkCentres installed a new realtek codec ID 0x623, it has 2 front mics with the same location on pin 0x18 and 0x19. Apply fixup ALC283_FIXUP_HEADSET_MIC to change 1 front mic location to right, then pulseaudio can handle them. One "Front Mic" and one "Mic" will be shown, and audio output works fine. Signed-off-by: Aaron Ma Cc: Link: https://lore.kernel.org/r/20191024114439.31522-1-aaron.ma@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a0c237cc13d4..80f66ba85f87 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7221,6 +7221,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), -- cgit v1.2.3 From a15542bb72a48042f5df7475893d46f725f5f9fb Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 20 Oct 2019 09:28:00 +0300 Subject: RDMA/nldev: Skip counter if port doesn't match The counter resource should return -EAGAIN if it was requested for a different port, this is similar to how QP works if the users provides a port filter. Otherwise port filtering in netlink will return broken counter nests. Fixes: c4ffee7c9bdb ("RDMA/netlink: Implement counter dumpit calback") Link: https://lore.kernel.org/r/20191020062800.8065-1-leon@kernel.org Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 65b36548bc17..c03af08b80e7 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -778,7 +778,7 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, container_of(res, struct rdma_counter, res); if (port && port != counter->port) - return 0; + return -EAGAIN; /* Dump it even query failed */ rdma_counter_query_stats(counter); -- cgit v1.2.3 From 82ecff655e7968151b0047f1b5de03b249e5c1c4 Mon Sep 17 00:00:00 2001 From: Takeshi Misawa Date: Sat, 19 Oct 2019 15:34:43 +0900 Subject: keys: Fix memory leak in copy_net_ns If copy_net_ns() failed after net_alloc(), net->key_domain is leaked. Fix this, by freeing key_domain in error path. syzbot report: BUG: memory leak unreferenced object 0xffff8881175007e0 (size 32): comm "syz-executor902", pid 7069, jiffies 4294944350 (age 28.400s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000a83ed741>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<00000000a83ed741>] slab_post_alloc_hook mm/slab.h:439 [inline] [<00000000a83ed741>] slab_alloc mm/slab.c:3326 [inline] [<00000000a83ed741>] kmem_cache_alloc_trace+0x13d/0x280 mm/slab.c:3553 [<0000000059fc92b9>] kmalloc include/linux/slab.h:547 [inline] [<0000000059fc92b9>] kzalloc include/linux/slab.h:742 [inline] [<0000000059fc92b9>] net_alloc net/core/net_namespace.c:398 [inline] [<0000000059fc92b9>] copy_net_ns+0xb2/0x220 net/core/net_namespace.c:445 [<00000000a9d74bbc>] create_new_namespaces+0x141/0x2a0 kernel/nsproxy.c:103 [<000000008047d645>] unshare_nsproxy_namespaces+0x7f/0x100 kernel/nsproxy.c:202 [<000000005993ea6e>] ksys_unshare+0x236/0x490 kernel/fork.c:2674 [<0000000019417e75>] __do_sys_unshare kernel/fork.c:2742 [inline] [<0000000019417e75>] __se_sys_unshare kernel/fork.c:2740 [inline] [<0000000019417e75>] __x64_sys_unshare+0x16/0x20 kernel/fork.c:2740 [<00000000f4c5f2c8>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:296 [<0000000038550184>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 syzbot also reported other leak in copy_net_ns -> setup_net. This problem is already fixed by cf47a0b882a4e5f6b34c7949d7b293e9287f1972. Fixes: 9b242610514f ("keys: Network namespace domain tag") Reported-and-tested-by: syzbot+3b3296d032353c33184b@syzkaller.appspotmail.com Signed-off-by: Takeshi Misawa Signed-off-by: David S. Miller --- net/core/net_namespace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6d3e4821b02d..5a4ae0845bac 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -479,6 +479,7 @@ struct net *copy_net_ns(unsigned long flags, if (rv < 0) { put_userns: + key_remove_domain(net->key_domain); put_user_ns(user_ns); net_drop_ns(net); dec_ucounts: -- cgit v1.2.3 From 5343da4c17429efaa5fb1594ea96aee1a283e694 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:50 +0000 Subject: net: core: limit nested device depth Current code doesn't limit the number of nested devices. Nested devices would be handled recursively and this needs huge stack memory. So, unlimited nested devices could make stack overflow. This patch adds upper_level and lower_level, they are common variables and represent maximum lower/upper depth. When upper/lower device is attached or dettached, {lower/upper}_level are updated. and if maximum depth is bigger than 8, attach routine fails and returns -EMLINK. In addition, this patch converts recursive routine of netdev_walk_all_{lower/upper} to iterator routine. Test commands: ip link add dummy0 type dummy ip link add link dummy0 name vlan1 type vlan id 1 ip link set vlan1 up for i in {2..55} do let A=$i-1 ip link add vlan$i link vlan$A type vlan id $i done ip link del dummy0 Splat looks like: [ 155.513226][ T908] BUG: KASAN: use-after-free in __unwind_start+0x71/0x850 [ 155.514162][ T908] Write of size 88 at addr ffff8880608a6cc0 by task ip/908 [ 155.515048][ T908] [ 155.515333][ T908] CPU: 0 PID: 908 Comm: ip Not tainted 5.4.0-rc3+ #96 [ 155.516147][ T908] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 155.517233][ T908] Call Trace: [ 155.517627][ T908] [ 155.517918][ T908] Allocated by task 0: [ 155.518412][ T908] (stack is not available) [ 155.518955][ T908] [ 155.519228][ T908] Freed by task 0: [ 155.519885][ T908] (stack is not available) [ 155.520452][ T908] [ 155.520729][ T908] The buggy address belongs to the object at ffff8880608a6ac0 [ 155.520729][ T908] which belongs to the cache names_cache of size 4096 [ 155.522387][ T908] The buggy address is located 512 bytes inside of [ 155.522387][ T908] 4096-byte region [ffff8880608a6ac0, ffff8880608a7ac0) [ 155.523920][ T908] The buggy address belongs to the page: [ 155.524552][ T908] page:ffffea0001822800 refcount:1 mapcount:0 mapping:ffff88806c657cc0 index:0x0 compound_mapcount:0 [ 155.525836][ T908] flags: 0x100000000010200(slab|head) [ 155.526445][ T908] raw: 0100000000010200 ffffea0001813808 ffffea0001a26c08 ffff88806c657cc0 [ 155.527424][ T908] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 155.528429][ T908] page dumped because: kasan: bad access detected [ 155.529158][ T908] [ 155.529410][ T908] Memory state around the buggy address: [ 155.530060][ T908] ffff8880608a6b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 155.530971][ T908] ffff8880608a6c00: fb fb fb fb fb f1 f1 f1 f1 00 f2 f2 f2 f3 f3 f3 [ 155.531889][ T908] >ffff8880608a6c80: f3 fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 155.532806][ T908] ^ [ 155.533509][ T908] ffff8880608a6d00: fb fb fb fb fb fb fb fb fb f1 f1 f1 f1 00 00 00 [ 155.534436][ T908] ffff8880608a6d80: f2 f3 f3 f3 f3 fb fb fb 00 00 00 00 00 00 00 00 [ ... ] Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 + net/core/dev.c | 272 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 231 insertions(+), 45 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9eda1c31d1f7..38c5909e1c35 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1649,6 +1649,8 @@ enum netdev_priv_flags { * @perm_addr: Permanent hw address * @addr_assign_type: Hw address assignment type * @addr_len: Hardware address length + * @upper_level: Maximum depth level of upper devices. + * @lower_level: Maximum depth level of lower devices. * @neigh_priv_len: Used in neigh_alloc() * @dev_id: Used to differentiate devices that share * the same link layer address @@ -1875,6 +1877,8 @@ struct net_device { unsigned char perm_addr[MAX_ADDR_LEN]; unsigned char addr_assign_type; unsigned char addr_len; + unsigned char upper_level; + unsigned char lower_level; unsigned short neigh_priv_len; unsigned short dev_id; unsigned short dev_port; diff --git a/net/core/dev.c b/net/core/dev.c index bf3ed413abaf..ab0edfc4a422 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -146,6 +146,7 @@ #include "net-sysfs.h" #define MAX_GRO_SKBS 8 +#define MAX_NEST_DEV 8 /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) @@ -6644,6 +6645,21 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); +static struct net_device *netdev_next_upper_dev(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *upper; + + upper = list_entry((*iter)->next, struct netdev_adjacent, list); + + if (&upper->list == &dev->adj_list.upper) + return NULL; + + *iter = &upper->list; + + return upper->dev; +} + static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, struct list_head **iter) { @@ -6661,28 +6677,93 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, return upper->dev; } +static int netdev_walk_all_upper_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; + + now = dev; + iter = &dev->adj_list.upper; + + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + udev = netdev_next_upper_dev(now, &iter); + if (!udev) + break; + + next = udev; + niter = &udev->adj_list.upper; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return 0; +} + int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, void *data), void *data) { - struct net_device *udev; - struct list_head *iter; - int ret; + struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; - for (iter = &dev->adj_list.upper, - udev = netdev_next_upper_dev_rcu(dev, &iter); - udev; - udev = netdev_next_upper_dev_rcu(dev, &iter)) { - /* first is the upper device itself */ - ret = fn(udev, data); - if (ret) - return ret; + now = dev; + iter = &dev->adj_list.upper; - /* then look at all of its upper devices */ - ret = netdev_walk_all_upper_dev_rcu(udev, fn, data); - if (ret) - return ret; + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + udev = netdev_next_upper_dev_rcu(now, &iter); + if (!udev) + break; + + next = udev; + niter = &udev->adj_list.upper; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; } return 0; @@ -6790,23 +6871,42 @@ int netdev_walk_all_lower_dev(struct net_device *dev, void *data), void *data) { - struct net_device *ldev; - struct list_head *iter; - int ret; + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; - for (iter = &dev->adj_list.lower, - ldev = netdev_next_lower_dev(dev, &iter); - ldev; - ldev = netdev_next_lower_dev(dev, &iter)) { - /* first is the lower device itself */ - ret = fn(ldev, data); - if (ret) - return ret; + now = dev; + iter = &dev->adj_list.lower; - /* then look at all of its lower devices */ - ret = netdev_walk_all_lower_dev(ldev, fn, data); - if (ret) - return ret; + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + ldev = netdev_next_lower_dev(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; } return 0; @@ -6827,28 +6927,93 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, return lower->dev; } -int netdev_walk_all_lower_dev_rcu(struct net_device *dev, - int (*fn)(struct net_device *dev, - void *data), - void *data) +static u8 __netdev_upper_depth(struct net_device *dev) +{ + struct net_device *udev; + struct list_head *iter; + u8 max_depth = 0; + + for (iter = &dev->adj_list.upper, + udev = netdev_next_upper_dev(dev, &iter); + udev; + udev = netdev_next_upper_dev(dev, &iter)) { + if (max_depth < udev->upper_level) + max_depth = udev->upper_level; + } + + return max_depth; +} + +static u8 __netdev_lower_depth(struct net_device *dev) { struct net_device *ldev; struct list_head *iter; - int ret; + u8 max_depth = 0; for (iter = &dev->adj_list.lower, - ldev = netdev_next_lower_dev_rcu(dev, &iter); + ldev = netdev_next_lower_dev(dev, &iter); ldev; - ldev = netdev_next_lower_dev_rcu(dev, &iter)) { - /* first is the lower device itself */ - ret = fn(ldev, data); - if (ret) - return ret; + ldev = netdev_next_lower_dev(dev, &iter)) { + if (max_depth < ldev->lower_level) + max_depth = ldev->lower_level; + } - /* then look at all of its lower devices */ - ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data); - if (ret) - return ret; + return max_depth; +} + +static int __netdev_update_upper_level(struct net_device *dev, void *data) +{ + dev->upper_level = __netdev_upper_depth(dev) + 1; + return 0; +} + +static int __netdev_update_lower_level(struct net_device *dev, void *data) +{ + dev->lower_level = __netdev_lower_depth(dev) + 1; + return 0; +} + +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; + + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + ldev = netdev_next_lower_dev_rcu(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; } return 0; @@ -7105,6 +7270,9 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; + if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV) + return -EMLINK; + if (!master) { if (netdev_has_upper_dev(dev, upper_dev)) return -EEXIST; @@ -7131,6 +7299,12 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) goto rollback; + __netdev_update_upper_level(dev, NULL); + netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); + + __netdev_update_lower_level(upper_dev, NULL); + netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); + return 0; rollback: @@ -7213,6 +7387,12 @@ void netdev_upper_dev_unlink(struct net_device *dev, call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); + + __netdev_update_upper_level(dev, NULL); + netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); + + __netdev_update_lower_level(upper_dev, NULL); + netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -9212,6 +9392,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; + dev->upper_level = 1; + dev->lower_level = 1; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); -- cgit v1.2.3 From ab92d68fc22f9afab480153bd82a20f6e2533769 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:51 +0000 Subject: net: core: add generic lockdep keys Some interface types could be nested. (VLAN, BONDING, TEAM, MACSEC, MACVLAN, IPVLAN, VIRT_WIFI, VXLAN, etc..) These interface types should set lockdep class because, without lockdep class key, lockdep always warn about unexisting circular locking. In the current code, these interfaces have their own lockdep class keys and these manage itself. So that there are so many duplicate code around the /driver/net and /net/. This patch adds new generic lockdep keys and some helper functions for it. This patch does below changes. a) Add lockdep class keys in struct net_device - qdisc_running, xmit, addr_list, qdisc_busylock - these keys are used as dynamic lockdep key. b) When net_device is being allocated, lockdep keys are registered. - alloc_netdev_mqs() c) When net_device is being free'd llockdep keys are unregistered. - free_netdev() d) Add generic lockdep key helper function - netdev_register_lockdep_key() - netdev_unregister_lockdep_key() - netdev_update_lockdep_key() e) Remove unnecessary generic lockdep macro and functions f) Remove unnecessary lockdep code of each interfaces. After this patch, each interface modules don't need to maintain their lockdep keys. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 - drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 18 --- drivers/net/hamradio/bpqether.c | 22 ---- drivers/net/hyperv/netvsc_drv.c | 2 - drivers/net/ipvlan/ipvlan_main.c | 2 - drivers/net/macsec.c | 5 - drivers/net/macvlan.c | 12 -- drivers/net/ppp/ppp_generic.c | 2 - drivers/net/team/team.c | 2 - drivers/net/vrf.c | 1 - drivers/net/wireless/intersil/hostap/hostap_hw.c | 25 ----- include/linux/netdevice.h | 35 +++--- net/8021q/vlan_dev.c | 27 ----- net/batman-adv/soft-interface.c | 32 ------ net/bluetooth/6lowpan.c | 8 -- net/bridge/br_device.c | 8 -- net/core/dev.c | 127 ++++++++-------------- net/core/rtnetlink.c | 1 + net/dsa/master.c | 5 - net/dsa/slave.c | 12 -- net/ieee802154/6lowpan/core.c | 8 -- net/l2tp/l2tp_eth.c | 1 - net/netrom/af_netrom.c | 23 ---- net/rose/af_rose.c | 23 ---- net/sched/sch_generic.c | 17 +-- 25 files changed, 63 insertions(+), 356 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 21d8fcc83c9c..ac1b09b56c77 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4769,7 +4769,6 @@ static int bond_init(struct net_device *bond_dev) return -ENOMEM; bond->nest_level = SINGLE_DEPTH_NESTING; - netdev_lockdep_set_classes(bond_dev); list_add_tail(&bond->bond_list, &bn->dev_list); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 1eef446036d6..79d72c88bbef 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -299,22 +299,6 @@ static void nfp_repr_clean(struct nfp_repr *repr) nfp_port_free(repr->port); } -static struct lock_class_key nfp_repr_netdev_xmit_lock_key; -static struct lock_class_key nfp_repr_netdev_addr_lock_key; - -static void nfp_repr_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, &nfp_repr_netdev_xmit_lock_key); -} - -static void nfp_repr_set_lockdep_class(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &nfp_repr_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, nfp_repr_set_lockdep_class_one, NULL); -} - int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, u32 cmsg_port_id, struct nfp_port *port, struct net_device *pf_netdev) @@ -324,8 +308,6 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, u32 repr_cap = nn->tlv_caps.repr_cap; int err; - nfp_repr_set_lockdep_class(netdev); - repr->port = port; repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL); if (!repr->dst) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index fbec711ff514..fbea6f232819 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -107,27 +107,6 @@ struct bpqdev { static LIST_HEAD(bpq_devices); -/* - * bpqether network devices are paired with ethernet devices below them, so - * form a special "super class" of normal ethernet devices; split their locks - * off into a separate class since they always nest. - */ -static struct lock_class_key bpq_netdev_xmit_lock_key; -static struct lock_class_key bpq_netdev_addr_lock_key; - -static void bpq_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, &bpq_netdev_xmit_lock_key); -} - -static void bpq_set_lockdep_class(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &bpq_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL); -} - /* ------------------------------------------------------------------------ */ @@ -498,7 +477,6 @@ static int bpq_new_device(struct net_device *edev) err = register_netdevice(ndev); if (err) goto error; - bpq_set_lockdep_class(ndev); /* List protected by RTNL */ list_add_rcu(&bpq->bpq_list, &bpq_devices); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 39dddcd8b3cb..fd4fff57fd6e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2335,8 +2335,6 @@ static int netvsc_probe(struct hv_device *dev, NETIF_F_HW_VLAN_CTAG_RX; net->vlan_features = net->features; - netdev_lockdep_set_classes(net); - /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 887bbba4631e..ba3dfac1d904 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -131,8 +131,6 @@ static int ipvlan_init(struct net_device *dev) dev->gso_max_segs = phy_dev->gso_max_segs; dev->hard_header_len = phy_dev->hard_header_len; - netdev_lockdep_set_classes(dev); - ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats); if (!ipvlan->pcpu_stats) return -ENOMEM; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index cb7637364b40..e2a3d1d5795f 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2750,7 +2750,6 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, #define MACSEC_FEATURES \ (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) -static struct lock_class_key macsec_netdev_addr_lock_key; static int macsec_dev_init(struct net_device *dev) { @@ -3264,10 +3263,6 @@ static int macsec_newlink(struct net *net, struct net_device *dev, dev_hold(real_dev); macsec->nest_level = dev_get_nest_level(real_dev) + 1; - netdev_lockdep_set_classes(dev); - lockdep_set_class_and_subclass(&dev->addr_list_lock, - &macsec_netdev_addr_lock_key, - macsec_get_nest_level(dev)); err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 940192c057b6..0354e9be2ca5 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -852,8 +852,6 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * "super class" of normal network devices; split their locks off into a * separate class since they always nest. */ -static struct lock_class_key macvlan_netdev_addr_lock_key; - #define ALWAYS_ON_OFFLOADS \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \ NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL) @@ -874,14 +872,6 @@ static int macvlan_get_nest_level(struct net_device *dev) return ((struct macvlan_dev *)netdev_priv(dev))->nest_level; } -static void macvlan_set_lockdep_class(struct net_device *dev) -{ - netdev_lockdep_set_classes(dev); - lockdep_set_class_and_subclass(&dev->addr_list_lock, - &macvlan_netdev_addr_lock_key, - macvlan_get_nest_level(dev)); -} - static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); @@ -900,8 +890,6 @@ static int macvlan_init(struct net_device *dev) dev->gso_max_segs = lowerdev->gso_max_segs; dev->hard_header_len = lowerdev->hard_header_len; - macvlan_set_lockdep_class(dev); - vlan->pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->pcpu_stats) return -ENOMEM; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 9a1b006904a7..61824bbb5588 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1324,8 +1324,6 @@ static int ppp_dev_init(struct net_device *dev) { struct ppp *ppp; - netdev_lockdep_set_classes(dev); - ppp = netdev_priv(dev); /* Let the netdevice take a reference on the ppp file. This ensures * that ppp_destroy_interface() won't run before the device gets diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index e8089def5a46..6cea83b48cad 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1642,8 +1642,6 @@ static int team_init(struct net_device *dev) goto err_options_register; netif_carrier_off(dev); - netdev_lockdep_set_classes(dev); - return 0; err_options_register: diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ee52bde058df..b8228f50bc94 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -865,7 +865,6 @@ static int vrf_dev_init(struct net_device *dev) /* similarly, oper state is irrelevant; set to up to avoid confusion */ dev->operstate = IF_OPER_UP; - netdev_lockdep_set_classes(dev); return 0; out_rth: diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c index 158a3d762e55..e323e9a5999f 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_hw.c +++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c @@ -3041,30 +3041,6 @@ static void prism2_clear_set_tim_queue(local_info_t *local) } } - -/* - * HostAP uses two layers of net devices, where the inner - * layer gets called all the time from the outer layer. - * This is a natural nesting, which needs a split lock type. - */ -static struct lock_class_key hostap_netdev_xmit_lock_key; -static struct lock_class_key hostap_netdev_addr_lock_key; - -static void prism2_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, - &hostap_netdev_xmit_lock_key); -} - -static void prism2_set_lockdep_class(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, - &hostap_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, prism2_set_lockdep_class_one, NULL); -} - static struct net_device * prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx, struct device *sdev) @@ -3223,7 +3199,6 @@ while (0) if (ret >= 0) ret = register_netdevice(dev); - prism2_set_lockdep_class(dev); rtnl_unlock(); if (ret < 0) { printk(KERN_WARNING "%s: register netdevice failed!\n", diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 38c5909e1c35..c93df7cf187b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -925,6 +925,7 @@ struct dev_ifalias { struct devlink; struct tlsdev_ops; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1760,9 +1761,13 @@ enum netdev_priv_flags { * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. - * - * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock - * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount + * @qdisc_tx_busylock_key: lockdep class annotating Qdisc->busylock + spinlock + * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount + * @qdisc_xmit_lock_key: lockdep class annotating + * netdev_queue->_xmit_lock spinlock + * @addr_list_lock_key: lockdep class annotating + * net_device->addr_list_lock spinlock * * @proto_down: protocol port state information can be sent to the * switch driver and used to set the phys state of the @@ -2049,8 +2054,10 @@ struct net_device { #endif struct phy_device *phydev; struct sfp_bus *sfp_bus; - struct lock_class_key *qdisc_tx_busylock; - struct lock_class_key *qdisc_running_key; + struct lock_class_key qdisc_tx_busylock_key; + struct lock_class_key qdisc_running_key; + struct lock_class_key qdisc_xmit_lock_key; + struct lock_class_key addr_list_lock_key; bool proto_down; unsigned wol_enabled:1; }; @@ -2128,23 +2135,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } -#define netdev_lockdep_set_classes(dev) \ -{ \ - static struct lock_class_key qdisc_tx_busylock_key; \ - static struct lock_class_key qdisc_running_key; \ - static struct lock_class_key qdisc_xmit_lock_key; \ - static struct lock_class_key dev_addr_list_lock_key; \ - unsigned int i; \ - \ - (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ - (dev)->qdisc_running_key = &qdisc_running_key; \ - lockdep_set_class(&(dev)->addr_list_lock, \ - &dev_addr_list_lock_key); \ - for (i = 0; i < (dev)->num_tx_queues; i++) \ - lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ - &qdisc_xmit_lock_key); \ -} - u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, @@ -3143,6 +3133,7 @@ static inline void netif_stop_queue(struct net_device *dev) } void netif_tx_stop_all_queues(struct net_device *dev); +void netdev_update_lockdep_key(struct net_device *dev); static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 93eadf179123..6e6f26bf6e73 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -489,31 +489,6 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } -/* - * vlan network devices have devices nesting below it, and are a special - * "super class" of normal network devices; split their locks off into a - * separate class since they always nest. - */ -static struct lock_class_key vlan_netdev_xmit_lock_key; -static struct lock_class_key vlan_netdev_addr_lock_key; - -static void vlan_dev_set_lockdep_one(struct net_device *dev, - struct netdev_queue *txq, - void *_subclass) -{ - lockdep_set_class_and_subclass(&txq->_xmit_lock, - &vlan_netdev_xmit_lock_key, - *(int *)_subclass); -} - -static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) -{ - lockdep_set_class_and_subclass(&dev->addr_list_lock, - &vlan_netdev_addr_lock_key, - subclass); - netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass); -} - static int vlan_dev_get_lock_subclass(struct net_device *dev) { return vlan_dev_priv(dev)->nest_level; @@ -609,8 +584,6 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev)); - vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->vlan_pcpu_stats) return -ENOMEM; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 9cbed6f5a85a..5ee8e9a100f9 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -740,36 +740,6 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, return 0; } -/* batman-adv network devices have devices nesting below it and are a special - * "super class" of normal network devices; split their locks off into a - * separate class since they always nest. - */ -static struct lock_class_key batadv_netdev_xmit_lock_key; -static struct lock_class_key batadv_netdev_addr_lock_key; - -/** - * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue - * @dev: device which owns the tx queue - * @txq: tx queue to modify - * @_unused: always NULL - */ -static void batadv_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key); -} - -/** - * batadv_set_lockdep_class() - Set txq and addr_list lockdep class - * @dev: network device to modify - */ -static void batadv_set_lockdep_class(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); -} - /** * batadv_softif_init_late() - late stage initialization of soft interface * @dev: registered network device to modify @@ -783,8 +753,6 @@ static int batadv_softif_init_late(struct net_device *dev) int ret; size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM; - batadv_set_lockdep_class(dev); - bat_priv = netdev_priv(dev); bat_priv->soft_iface = dev; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index bb55d92691b0..4febc82a7c76 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -571,15 +571,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) return err < 0 ? NET_XMIT_DROP : err; } -static int bt_dev_init(struct net_device *dev) -{ - netdev_lockdep_set_classes(dev); - - return 0; -} - static const struct net_device_ops netdev_ops = { - .ndo_init = bt_dev_init, .ndo_start_xmit = bt_xmit, }; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 681b72862c16..e804a3016902 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -24,8 +24,6 @@ const struct nf_br_ops __rcu *nf_br_ops __read_mostly; EXPORT_SYMBOL_GPL(nf_br_ops); -static struct lock_class_key bridge_netdev_addr_lock_key; - /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -108,11 +106,6 @@ out: return NETDEV_TX_OK; } -static void br_set_lockdep_class(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key); -} - static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -150,7 +143,6 @@ static int br_dev_init(struct net_device *dev) br_mdb_hash_fini(br); br_fdb_hash_fini(br); } - br_set_lockdep_class(dev); return err; } diff --git a/net/core/dev.c b/net/core/dev.c index ab0edfc4a422..5722a81b6edd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -277,88 +277,6 @@ static RAW_NOTIFIER_HEAD(netdev_chain); DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); EXPORT_PER_CPU_SYMBOL(softnet_data); -#ifdef CONFIG_LOCKDEP -/* - * register_netdevice() inits txq->_xmit_lock and sets lockdep class - * according to dev->type - */ -static const unsigned short netdev_lock_type[] = { - ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, - ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, - ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, - ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, - ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, - ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, - ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, - ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, - ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, - ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, - ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, - ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, - ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, - ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, - ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; - -static const char *const netdev_lock_name[] = { - "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", - "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", - "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", - "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", - "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", - "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", - "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", - "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", - "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", - "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", - "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", - "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", - "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", - "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", - "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; - -static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; -static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; - -static inline unsigned short netdev_lock_pos(unsigned short dev_type) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) - if (netdev_lock_type[i] == dev_type) - return i; - /* the last key is used by default */ - return ARRAY_SIZE(netdev_lock_type) - 1; -} - -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, - unsigned short dev_type) -{ - int i; - - i = netdev_lock_pos(dev_type); - lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], - netdev_lock_name[i]); -} - -static inline void netdev_set_addr_lockdep_class(struct net_device *dev) -{ - int i; - - i = netdev_lock_pos(dev->type); - lockdep_set_class_and_name(&dev->addr_list_lock, - &netdev_addr_lock_key[i], - netdev_lock_name[i]); -} -#else -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, - unsigned short dev_type) -{ -} -static inline void netdev_set_addr_lockdep_class(struct net_device *dev) -{ -} -#endif - /******************************************************************************* * * Protocol management and registration routines @@ -8799,7 +8717,7 @@ static void netdev_init_one_queue(struct net_device *dev, { /* Initialize queue lock */ spin_lock_init(&queue->_xmit_lock); - netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); + lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key); queue->xmit_lock_owner = -1; netdev_queue_numa_node_write(queue, NUMA_NO_NODE); queue->dev = dev; @@ -8846,6 +8764,43 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); +static void netdev_register_lockdep_key(struct net_device *dev) +{ + lockdep_register_key(&dev->qdisc_tx_busylock_key); + lockdep_register_key(&dev->qdisc_running_key); + lockdep_register_key(&dev->qdisc_xmit_lock_key); + lockdep_register_key(&dev->addr_list_lock_key); +} + +static void netdev_unregister_lockdep_key(struct net_device *dev) +{ + lockdep_unregister_key(&dev->qdisc_tx_busylock_key); + lockdep_unregister_key(&dev->qdisc_running_key); + lockdep_unregister_key(&dev->qdisc_xmit_lock_key); + lockdep_unregister_key(&dev->addr_list_lock_key); +} + +void netdev_update_lockdep_key(struct net_device *dev) +{ + struct netdev_queue *queue; + int i; + + lockdep_unregister_key(&dev->qdisc_xmit_lock_key); + lockdep_unregister_key(&dev->addr_list_lock_key); + + lockdep_register_key(&dev->qdisc_xmit_lock_key); + lockdep_register_key(&dev->addr_list_lock_key); + + lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); + for (i = 0; i < dev->num_tx_queues; i++) { + queue = netdev_get_tx_queue(dev, i); + + lockdep_set_class(&queue->_xmit_lock, + &dev->qdisc_xmit_lock_key); + } +} +EXPORT_SYMBOL(netdev_update_lockdep_key); + /** * register_netdevice - register a network device * @dev: device to register @@ -8880,7 +8835,7 @@ int register_netdevice(struct net_device *dev) BUG_ON(!net); spin_lock_init(&dev->addr_list_lock); - netdev_set_addr_lockdep_class(dev); + lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) @@ -9390,6 +9345,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); + netdev_register_lockdep_key(dev); + dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; dev->upper_level = 1; @@ -9474,6 +9431,8 @@ void free_netdev(struct net_device *dev) free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; + netdev_unregister_lockdep_key(dev); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1ee6460f8275..13493aae4e6c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2355,6 +2355,7 @@ static int do_set_master(struct net_device *dev, int ifindex, err = ops->ndo_del_slave(upper_dev, dev); if (err) return err; + netdev_update_lockdep_key(dev); } else { return -EOPNOTSUPP; } diff --git a/net/dsa/master.c b/net/dsa/master.c index a8e52c9967f4..3255dfc97f86 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -310,8 +310,6 @@ static void dsa_master_reset_mtu(struct net_device *dev) rtnl_unlock(); } -static struct lock_class_key dsa_master_addr_list_lock_key; - int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int ret; @@ -325,9 +323,6 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) wmb(); dev->dsa_ptr = cpu_dp; - lockdep_set_class(&dev->addr_list_lock, - &dsa_master_addr_list_lock_key); - ret = dsa_master_ethtool_setup(dev); if (ret) return ret; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 75d58229a4bd..028e65f4b5ba 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1341,15 +1341,6 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) return ret; } -static struct lock_class_key dsa_slave_netdev_xmit_lock_key; -static void dsa_slave_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, - &dsa_slave_netdev_xmit_lock_key); -} - int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_port *dp = dsa_slave_to_port(slave_dev); @@ -1433,9 +1424,6 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->max_mtu = ETH_MAX_MTU; SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); - netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, - NULL); - SET_NETDEV_DEV(slave_dev, port->ds->dev); slave_dev->dev.of_node = port->dn; slave_dev->vlan_features = master->vlan_features; diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 3297e7fa9945..c0b107cdd715 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -58,13 +58,6 @@ static const struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; -static int lowpan_dev_init(struct net_device *ldev) -{ - netdev_lockdep_set_classes(ldev); - - return 0; -} - static int lowpan_open(struct net_device *dev) { if (!open_count) @@ -96,7 +89,6 @@ static int lowpan_get_iflink(const struct net_device *dev) } static const struct net_device_ops lowpan_netdev_ops = { - .ndo_init = lowpan_dev_init, .ndo_start_xmit = lowpan_xmit, .ndo_open = lowpan_open, .ndo_stop = lowpan_stop, diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index fd5ac2788e45..d3b520b9b2c9 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -56,7 +56,6 @@ static int l2tp_eth_dev_init(struct net_device *dev) { eth_hw_addr_random(dev); eth_broadcast_addr(dev->broadcast); - netdev_lockdep_set_classes(dev); return 0; } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index c4f54ad2b98a..58d5373c513c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -63,28 +63,6 @@ static DEFINE_SPINLOCK(nr_list_lock); static const struct proto_ops nr_proto_ops; -/* - * NETROM network devices are virtual network devices encapsulating NETROM - * frames into AX.25 which will be sent through an AX.25 device, so form a - * special "super class" of normal net devices; split their locks off into a - * separate class since they always nest. - */ -static struct lock_class_key nr_netdev_xmit_lock_key; -static struct lock_class_key nr_netdev_addr_lock_key; - -static void nr_set_lockdep_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key); -} - -static void nr_set_lockdep_key(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL); -} - /* * Socket removal during an interrupt is now safe. */ @@ -1414,7 +1392,6 @@ static int __init nr_proto_init(void) free_netdev(dev); goto fail; } - nr_set_lockdep_key(dev); dev_nr[i] = dev; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index f0e9ccf472a9..6a0df7c8a939 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -64,28 +64,6 @@ static const struct proto_ops rose_proto_ops; ax25_address rose_callsign; -/* - * ROSE network devices are virtual network devices encapsulating ROSE - * frames into AX.25 which will be sent through an AX.25 device, so form a - * special "super class" of normal net devices; split their locks off into a - * separate class since they always nest. - */ -static struct lock_class_key rose_netdev_xmit_lock_key; -static struct lock_class_key rose_netdev_addr_lock_key; - -static void rose_set_lockdep_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key); -} - -static void rose_set_lockdep_key(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); -} - /* * Convert a ROSE address into text. */ @@ -1533,7 +1511,6 @@ static int __init rose_proto_init(void) free_netdev(dev); goto fail; } - rose_set_lockdep_key(dev); dev_rose[i] = dev; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 17bd8f539bc7..b2d34c49cbe6 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -799,9 +799,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { }; EXPORT_SYMBOL(pfifo_fast_ops); -static struct lock_class_key qdisc_tx_busylock; -static struct lock_class_key qdisc_running_key; - struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, struct netlink_ext_ack *extack) @@ -854,17 +851,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, } spin_lock_init(&sch->busylock); - lockdep_set_class(&sch->busylock, - dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); - /* seqlock has the same scope of busylock, for NOLOCK qdisc */ spin_lock_init(&sch->seqlock); - lockdep_set_class(&sch->busylock, - dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); - seqcount_init(&sch->running); - lockdep_set_class(&sch->running, - dev->qdisc_running_key ?: &qdisc_running_key); sch->ops = ops; sch->flags = ops->static_flags; @@ -875,6 +864,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, dev_hold(dev); refcount_set(&sch->refcnt, 1); + if (sch != &noop_qdisc) { + lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key); + lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key); + lockdep_set_class(&sch->running, &dev->qdisc_running_key); + } + return sch; errout1: kfree(p); -- cgit v1.2.3 From 65de65d9033750d2cf1b336c9d6e9da3a8b5cc6e Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:52 +0000 Subject: bonding: fix unexpected IFF_BONDING bit unset The IFF_BONDING means bonding master or bonding slave device. ->ndo_add_slave() sets IFF_BONDING flag and ->ndo_del_slave() unsets IFF_BONDING flag. bond0<--bond1 Both bond0 and bond1 are bonding device and these should keep having IFF_BONDING flag until they are removed. But bond1 would lose IFF_BONDING at ->ndo_del_slave() because that routine do not check whether the slave device is the bonding type or not. This patch adds the interface type check routine before removing IFF_BONDING flag. Test commands: ip link add bond0 type bond ip link add bond1 type bond ip link set bond1 master bond0 ip link set bond1 nomaster ip link del bond1 type bond ip link add bond1 type bond Splat looks like: [ 226.665555] proc_dir_entry 'bonding/bond1' already registered [ 226.666440] WARNING: CPU: 0 PID: 737 at fs/proc/generic.c:361 proc_register+0x2a9/0x3e0 [ 226.667571] Modules linked in: bonding af_packet sch_fq_codel ip_tables x_tables unix [ 226.668662] CPU: 0 PID: 737 Comm: ip Not tainted 5.4.0-rc3+ #96 [ 226.669508] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 226.670652] RIP: 0010:proc_register+0x2a9/0x3e0 [ 226.671612] Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 39 01 00 00 48 8b 04 24 48 89 ea 48 c7 c7 a0 0b 14 9f 48 8b b0 e 0 00 00 00 e8 07 e7 88 ff <0f> 0b 48 c7 c7 40 2d a5 9f e8 59 d6 23 01 48 8b 4c 24 10 48 b8 00 [ 226.675007] RSP: 0018:ffff888050e17078 EFLAGS: 00010282 [ 226.675761] RAX: dffffc0000000008 RBX: ffff88805fdd0f10 RCX: ffffffff9dd344e2 [ 226.676757] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff88806c9f6b8c [ 226.677751] RBP: ffff8880507160f3 R08: ffffed100d940019 R09: ffffed100d940019 [ 226.678761] R10: 0000000000000001 R11: ffffed100d940018 R12: ffff888050716008 [ 226.679757] R13: ffff8880507160f2 R14: dffffc0000000000 R15: ffffed100a0e2c1e [ 226.680758] FS: 00007fdc217cc0c0(0000) GS:ffff88806c800000(0000) knlGS:0000000000000000 [ 226.681886] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 226.682719] CR2: 00007f49313424d0 CR3: 0000000050e46001 CR4: 00000000000606f0 [ 226.683727] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 226.684725] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 226.685681] Call Trace: [ 226.687089] proc_create_seq_private+0xb3/0xf0 [ 226.687778] bond_create_proc_entry+0x1b3/0x3f0 [bonding] [ 226.691458] bond_netdev_event+0x433/0x970 [bonding] [ 226.692139] ? __module_text_address+0x13/0x140 [ 226.692779] notifier_call_chain+0x90/0x160 [ 226.693401] register_netdevice+0x9b3/0xd80 [ 226.694010] ? alloc_netdev_mqs+0x854/0xc10 [ 226.694629] ? netdev_change_features+0xa0/0xa0 [ 226.695278] ? rtnl_create_link+0x2ed/0xad0 [ 226.695849] bond_newlink+0x2a/0x60 [bonding] [ 226.696422] __rtnl_newlink+0xb9f/0x11b0 [ 226.696968] ? rtnl_link_unregister+0x220/0x220 [ ... ] Fixes: 0b680e753724 ("[PATCH] bonding: Add priv_flag to avoid event mishandling") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ac1b09b56c77..92713b93f66f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1816,7 +1816,8 @@ err_detach: slave_disable_netpoll(new_slave); err_close: - slave_dev->priv_flags &= ~IFF_BONDING; + if (!netif_is_bond_master(slave_dev)) + slave_dev->priv_flags &= ~IFF_BONDING; dev_close(slave_dev); err_restore_mac: @@ -2017,7 +2018,8 @@ static int __bond_release_one(struct net_device *bond_dev, else dev_set_mtu(slave_dev, slave->original_mtu); - slave_dev->priv_flags &= ~IFF_BONDING; + if (!netif_is_bond_master(slave_dev)) + slave_dev->priv_flags &= ~IFF_BONDING; bond_free_slave(slave); -- cgit v1.2.3 From 089bca2caed0d0dea7da235ce1fe245808f5ec02 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:53 +0000 Subject: bonding: use dynamic lockdep key instead of subclass All bonding device has same lockdep key and subclass is initialized with nest_level. But actual nest_level value can be changed when a lower device is attached. And at this moment, the subclass should be updated but it seems to be unsafe. So this patch makes bonding use dynamic lockdep key instead of the subclass. Test commands: ip link add bond0 type bond for i in {1..5} do let A=$i-1 ip link add bond$i type bond ip link set bond$i master bond$A done ip link set bond5 master bond0 Splat looks like: [ 307.992912] WARNING: possible recursive locking detected [ 307.993656] 5.4.0-rc3+ #96 Tainted: G W [ 307.994367] -------------------------------------------- [ 307.995092] ip/761 is trying to acquire lock: [ 307.995710] ffff8880513aac60 (&(&bond->stats_lock)->rlock#2/2){+.+.}, at: bond_get_stats+0xb8/0x500 [bonding] [ 307.997045] but task is already holding lock: [ 307.997923] ffff88805fcbac60 (&(&bond->stats_lock)->rlock#2/2){+.+.}, at: bond_get_stats+0xb8/0x500 [bonding] [ 307.999215] other info that might help us debug this: [ 308.000251] Possible unsafe locking scenario: [ 308.001137] CPU0 [ 308.001533] ---- [ 308.001915] lock(&(&bond->stats_lock)->rlock#2/2); [ 308.002609] lock(&(&bond->stats_lock)->rlock#2/2); [ 308.003302] *** DEADLOCK *** [ 308.004310] May be due to missing lock nesting notation [ 308.005319] 3 locks held by ip/761: [ 308.005830] #0: ffffffff9fcc42b0 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x466/0x8a0 [ 308.006894] #1: ffff88805fcbac60 (&(&bond->stats_lock)->rlock#2/2){+.+.}, at: bond_get_stats+0xb8/0x500 [bonding] [ 308.008243] #2: ffffffff9f9219c0 (rcu_read_lock){....}, at: bond_get_stats+0x9f/0x500 [bonding] [ 308.009422] stack backtrace: [ 308.010124] CPU: 0 PID: 761 Comm: ip Tainted: G W 5.4.0-rc3+ #96 [ 308.011097] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 308.012179] Call Trace: [ 308.012601] dump_stack+0x7c/0xbb [ 308.013089] __lock_acquire+0x269d/0x3de0 [ 308.013669] ? register_lock_class+0x14d0/0x14d0 [ 308.014318] lock_acquire+0x164/0x3b0 [ 308.014858] ? bond_get_stats+0xb8/0x500 [bonding] [ 308.015520] _raw_spin_lock_nested+0x2e/0x60 [ 308.016129] ? bond_get_stats+0xb8/0x500 [bonding] [ 308.017215] bond_get_stats+0xb8/0x500 [bonding] [ 308.018454] ? bond_arp_rcv+0xf10/0xf10 [bonding] [ 308.019710] ? rcu_read_lock_held+0x90/0xa0 [ 308.020605] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 308.021286] ? bond_get_stats+0x9f/0x500 [bonding] [ 308.021953] dev_get_stats+0x1ec/0x270 [ 308.022508] bond_get_stats+0x1d1/0x500 [bonding] Fixes: d3fff6c443fe ("net: add netdev_lockdep_set_classes() helper") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 10 +++++++--- include/net/bonding.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 92713b93f66f..6a6273590288 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3459,7 +3459,7 @@ static void bond_get_stats(struct net_device *bond_dev, struct list_head *iter; struct slave *slave; - spin_lock_nested(&bond->stats_lock, bond_get_nest_level(bond_dev)); + spin_lock(&bond->stats_lock); memcpy(stats, &bond->bond_stats, sizeof(*stats)); rcu_read_lock(); @@ -4297,8 +4297,6 @@ void bond_setup(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - spin_lock_init(&bond->mode_lock); - spin_lock_init(&bond->stats_lock); bond->params = bonding_defaults; /* Initialize pointers */ @@ -4367,6 +4365,7 @@ static void bond_uninit(struct net_device *bond_dev) list_del(&bond->bond_list); + lockdep_unregister_key(&bond->stats_lock_key); bond_debug_unregister(bond); } @@ -4772,6 +4771,11 @@ static int bond_init(struct net_device *bond_dev) bond->nest_level = SINGLE_DEPTH_NESTING; + spin_lock_init(&bond->mode_lock); + spin_lock_init(&bond->stats_lock); + lockdep_register_key(&bond->stats_lock_key); + lockdep_set_class(&bond->stats_lock, &bond->stats_lock_key); + list_add_tail(&bond->bond_list, &bn->dev_list); bond_prepare_sysfs_group(bond); diff --git a/include/net/bonding.h b/include/net/bonding.h index f7fe45689142..334909feb2bb 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -239,6 +239,7 @@ struct bonding { struct dentry *debug_dir; #endif /* CONFIG_DEBUG_FS */ struct rtnl_link_stats64 bond_stats; + struct lock_class_key stats_lock_key; }; #define bond_slave_get_rcu(dev) \ -- cgit v1.2.3 From 369f61bee0f584aee09f0736431eb9b330c98571 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:54 +0000 Subject: team: fix nested locking lockdep warning team interface could be nested and it's lock variable could be nested too. But this lock uses static lockdep key and there is no nested locking handling code such as mutex_lock_nested() and so on. so the Lockdep would warn about the circular locking scenario that couldn't happen. In order to fix, this patch makes the team module to use dynamic lock key instead of static key. Test commands: ip link add team0 type team ip link add team1 type team ip link set team0 master team1 ip link set team0 nomaster ip link set team1 master team0 ip link set team1 nomaster Splat that looks like: [ 40.364352] WARNING: possible recursive locking detected [ 40.364964] 5.4.0-rc3+ #96 Not tainted [ 40.365405] -------------------------------------------- [ 40.365973] ip/750 is trying to acquire lock: [ 40.366542] ffff888060b34c40 (&team->lock){+.+.}, at: team_set_mac_address+0x151/0x290 [team] [ 40.367689] but task is already holding lock: [ 40.368729] ffff888051201c40 (&team->lock){+.+.}, at: team_del_slave+0x29/0x60 [team] [ 40.370280] other info that might help us debug this: [ 40.371159] Possible unsafe locking scenario: [ 40.371942] CPU0 [ 40.372338] ---- [ 40.372673] lock(&team->lock); [ 40.373115] lock(&team->lock); [ 40.373549] *** DEADLOCK *** [ 40.374432] May be due to missing lock nesting notation [ 40.375338] 2 locks held by ip/750: [ 40.375851] #0: ffffffffabcc42b0 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x466/0x8a0 [ 40.376927] #1: ffff888051201c40 (&team->lock){+.+.}, at: team_del_slave+0x29/0x60 [team] [ 40.377989] stack backtrace: [ 40.378650] CPU: 0 PID: 750 Comm: ip Not tainted 5.4.0-rc3+ #96 [ 40.379368] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 40.380574] Call Trace: [ 40.381208] dump_stack+0x7c/0xbb [ 40.381959] __lock_acquire+0x269d/0x3de0 [ 40.382817] ? register_lock_class+0x14d0/0x14d0 [ 40.383784] ? check_chain_key+0x236/0x5d0 [ 40.384518] lock_acquire+0x164/0x3b0 [ 40.385074] ? team_set_mac_address+0x151/0x290 [team] [ 40.385805] __mutex_lock+0x14d/0x14c0 [ 40.386371] ? team_set_mac_address+0x151/0x290 [team] [ 40.387038] ? team_set_mac_address+0x151/0x290 [team] [ 40.387632] ? mutex_lock_io_nested+0x1380/0x1380 [ 40.388245] ? team_del_slave+0x60/0x60 [team] [ 40.388752] ? rcu_read_lock_sched_held+0x90/0xc0 [ 40.389304] ? rcu_read_lock_bh_held+0xa0/0xa0 [ 40.389819] ? lock_acquire+0x164/0x3b0 [ 40.390285] ? lockdep_rtnl_is_held+0x16/0x20 [ 40.390797] ? team_port_get_rtnl+0x90/0xe0 [team] [ 40.391353] ? __module_text_address+0x13/0x140 [ 40.391886] ? team_set_mac_address+0x151/0x290 [team] [ 40.392547] team_set_mac_address+0x151/0x290 [team] [ 40.393111] dev_set_mac_address+0x1f0/0x3f0 [ ... ] Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/team/team.c | 16 +++++++++++++--- include/linux/if_team.h | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6cea83b48cad..8156b33ee3e7 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1615,7 +1615,6 @@ static int team_init(struct net_device *dev) int err; team->dev = dev; - mutex_init(&team->lock); team_set_no_mode(team); team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats); @@ -1642,6 +1641,9 @@ static int team_init(struct net_device *dev) goto err_options_register; netif_carrier_off(dev); + lockdep_register_key(&team->team_lock_key); + __mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key); + return 0; err_options_register: @@ -1671,6 +1673,7 @@ static void team_uninit(struct net_device *dev) team_queue_override_fini(team); mutex_unlock(&team->lock); netdev_change_features(dev); + lockdep_unregister_key(&team->team_lock_key); } static void team_destructor(struct net_device *dev) @@ -1974,8 +1977,15 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev) err = team_port_del(team, port_dev); mutex_unlock(&team->lock); - if (!err) - netdev_change_features(dev); + if (err) + return err; + + if (netif_is_team_master(port_dev)) { + lockdep_unregister_key(&team->team_lock_key); + lockdep_register_key(&team->team_lock_key); + lockdep_set_class(&team->lock, &team->team_lock_key); + } + netdev_change_features(dev); return err; } diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 06faa066496f..ec7e4bd07f82 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -223,6 +223,7 @@ struct team { atomic_t count_pending; struct delayed_work dw; } mcast_rejoin; + struct lock_class_key team_lock_key; long mode_priv[TEAM_MODE_PRIV_LONGS]; }; -- cgit v1.2.3 From 2bce1ebed17da54c65042ec2b962e3234bad5b47 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:55 +0000 Subject: macsec: fix refcnt leak in module exit routine When a macsec interface is created, it increases a refcnt to a lower device(real device). when macsec interface is deleted, the refcnt is decreased in macsec_free_netdev(), which is ->priv_destructor() of macsec interface. The problem scenario is this. When nested macsec interfaces are exiting, the exit routine of the macsec module makes refcnt leaks. Test commands: ip link add dummy0 type dummy ip link add macsec0 link dummy0 type macsec ip link add macsec1 link macsec0 type macsec modprobe -rv macsec [ 208.629433] unregister_netdevice: waiting for macsec0 to become free. Usage count = 1 Steps of exit routine of macsec module are below. 1. Calls ->dellink() in __rtnl_link_unregister(). 2. Checks refcnt and wait refcnt to be 0 if refcnt is not 0 in netdev_run_todo(). 3. Calls ->priv_destruvtor() in netdev_run_todo(). Step2 checks refcnt, but step3 decreases refcnt. So, step2 waits forever. This patch makes the macsec module do not hold a refcnt of the lower device because it already holds a refcnt of the lower device with netdev_upper_dev_link(). Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/macsec.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index e2a3d1d5795f..9e97b66b26d3 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3000,12 +3000,10 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { static void macsec_free_netdev(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); - struct net_device *real_dev = macsec->real_dev; free_percpu(macsec->stats); free_percpu(macsec->secy.tx_sc.stats); - dev_put(real_dev); } static void macsec_setup(struct net_device *dev) @@ -3260,8 +3258,6 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (err < 0) return err; - dev_hold(real_dev); - macsec->nest_level = dev_get_nest_level(real_dev) + 1; err = netdev_upper_dev_link(real_dev, dev, extack); -- cgit v1.2.3 From 32b6d34fedc2229cdf6a047fdbc0704085441915 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:56 +0000 Subject: net: core: add ignore flag to netdev_adjacent structure In order to link an adjacent node, netdev_upper_dev_link() is used and in order to unlink an adjacent node, netdev_upper_dev_unlink() is used. unlink operation does not fail, but link operation can fail. In order to exchange adjacent nodes, we should unlink an old adjacent node first. then, link a new adjacent node. If link operation is failed, we should link an old adjacent node again. But this link operation can fail too. It eventually breaks the adjacent link relationship. This patch adds an ignore flag into the netdev_adjacent structure. If this flag is set, netdev_upper_dev_link() ignores an old adjacent node for a moment. This patch also adds new functions for other modules. netdev_adjacent_change_prepare() netdev_adjacent_change_commit() netdev_adjacent_change_abort() netdev_adjacent_change_prepare() inserts new device into adjacent list but new device is not allowed to use immediately. If netdev_adjacent_change_prepare() fails, it internally rollbacks adjacent list so that we don't need any other action. netdev_adjacent_change_commit() deletes old device in the adjacent list and allows new device to use. netdev_adjacent_change_abort() rollbacks adjacent list. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++ net/core/dev.c | 230 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 219 insertions(+), 21 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c93df7cf187b..6c6490e15cd4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4324,6 +4324,16 @@ int netdev_master_upper_dev_link(struct net_device *dev, struct netlink_ext_ack *extack); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); +int netdev_adjacent_change_prepare(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev, + struct netlink_ext_ack *extack); +void netdev_adjacent_change_commit(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev); +void netdev_adjacent_change_abort(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); diff --git a/net/core/dev.c b/net/core/dev.c index 5722a81b6edd..092c094038b6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6408,6 +6408,9 @@ struct netdev_adjacent { /* upper master flag, there can only be one master device per list */ bool master; + /* lookup ignore flag */ + bool ignore; + /* counter for the number of times this device was added to us */ u16 ref_nr; @@ -6430,7 +6433,7 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } -static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) +static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) { struct net_device *dev = data; @@ -6451,7 +6454,7 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -6469,7 +6472,7 @@ EXPORT_SYMBOL(netdev_has_upper_dev); bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev) { - return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); @@ -6513,6 +6516,22 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); +static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev) +{ + struct netdev_adjacent *upper; + + ASSERT_RTNL(); + + if (list_empty(&dev->adj_list.upper)) + return NULL; + + upper = list_first_entry(&dev->adj_list.upper, + struct netdev_adjacent, list); + if (likely(upper->master) && !upper->ignore) + return upper->dev; + return NULL; +} + /** * netdev_has_any_lower_dev - Check if device is linked to some device * @dev: device @@ -6563,8 +6582,9 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); -static struct net_device *netdev_next_upper_dev(struct net_device *dev, - struct list_head **iter) +static struct net_device *__netdev_next_upper_dev(struct net_device *dev, + struct list_head **iter, + bool *ignore) { struct netdev_adjacent *upper; @@ -6574,6 +6594,7 @@ static struct net_device *netdev_next_upper_dev(struct net_device *dev, return NULL; *iter = &upper->list; + *ignore = upper->ignore; return upper->dev; } @@ -6595,14 +6616,15 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, return upper->dev; } -static int netdev_walk_all_upper_dev(struct net_device *dev, - int (*fn)(struct net_device *dev, - void *data), - void *data) +static int __netdev_walk_all_upper_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) { struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; int ret, cur = 0; + bool ignore; now = dev; iter = &dev->adj_list.upper; @@ -6616,9 +6638,11 @@ static int netdev_walk_all_upper_dev(struct net_device *dev, next = NULL; while (1) { - udev = netdev_next_upper_dev(now, &iter); + udev = __netdev_next_upper_dev(now, &iter, &ignore); if (!udev) break; + if (ignore) + continue; next = udev; niter = &udev->adj_list.upper; @@ -6688,6 +6712,15 @@ int netdev_walk_all_upper_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); +static bool __netdev_has_upper_dev(struct net_device *dev, + struct net_device *upper_dev) +{ + ASSERT_RTNL(); + + return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev, + upper_dev); +} + /** * netdev_lower_get_next_private - Get the next ->private from the * lower neighbour list @@ -6784,6 +6817,23 @@ static struct net_device *netdev_next_lower_dev(struct net_device *dev, return lower->dev; } +static struct net_device *__netdev_next_lower_dev(struct net_device *dev, + struct list_head **iter, + bool *ignore) +{ + struct netdev_adjacent *lower; + + lower = list_entry((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = &lower->list; + *ignore = lower->ignore; + + return lower->dev; +} + int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, void *data), @@ -6831,6 +6881,55 @@ int netdev_walk_all_lower_dev(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); +static int __netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; + bool ignore; + + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + ldev = __netdev_next_lower_dev(now, &iter, &ignore); + if (!ldev) + break; + if (ignore) + continue; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return 0; +} + static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter) { @@ -6850,11 +6949,14 @@ static u8 __netdev_upper_depth(struct net_device *dev) struct net_device *udev; struct list_head *iter; u8 max_depth = 0; + bool ignore; for (iter = &dev->adj_list.upper, - udev = netdev_next_upper_dev(dev, &iter); + udev = __netdev_next_upper_dev(dev, &iter, &ignore); udev; - udev = netdev_next_upper_dev(dev, &iter)) { + udev = __netdev_next_upper_dev(dev, &iter, &ignore)) { + if (ignore) + continue; if (max_depth < udev->upper_level) max_depth = udev->upper_level; } @@ -6867,11 +6969,14 @@ static u8 __netdev_lower_depth(struct net_device *dev) struct net_device *ldev; struct list_head *iter; u8 max_depth = 0; + bool ignore; for (iter = &dev->adj_list.lower, - ldev = netdev_next_lower_dev(dev, &iter); + ldev = __netdev_next_lower_dev(dev, &iter, &ignore); ldev; - ldev = netdev_next_lower_dev(dev, &iter)) { + ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) { + if (ignore) + continue; if (max_depth < ldev->lower_level) max_depth = ldev->lower_level; } @@ -7035,6 +7140,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->master = master; adj->ref_nr = 1; adj->private = private; + adj->ignore = false; dev_hold(adj_dev); pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", @@ -7185,17 +7291,17 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (netdev_has_upper_dev(upper_dev, dev)) + if (__netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV) return -EMLINK; if (!master) { - if (netdev_has_upper_dev(dev, upper_dev)) + if (__netdev_has_upper_dev(dev, upper_dev)) return -EEXIST; } else { - master_dev = netdev_master_upper_dev_get(dev); + master_dev = __netdev_master_upper_dev_get(dev); if (master_dev) return master_dev == upper_dev ? -EEXIST : -EBUSY; } @@ -7218,10 +7324,11 @@ static int __netdev_upper_dev_link(struct net_device *dev, goto rollback; __netdev_update_upper_level(dev, NULL); - netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); + __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); __netdev_update_lower_level(upper_dev, NULL); - netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); + __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, + NULL); return 0; @@ -7307,13 +7414,94 @@ void netdev_upper_dev_unlink(struct net_device *dev, &changeupper_info.info); __netdev_update_upper_level(dev, NULL); - netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); + __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); __netdev_update_lower_level(upper_dev, NULL); - netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); + __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, + NULL); } EXPORT_SYMBOL(netdev_upper_dev_unlink); +static void __netdev_adjacent_dev_set(struct net_device *upper_dev, + struct net_device *lower_dev, + bool val) +{ + struct netdev_adjacent *adj; + + adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower); + if (adj) + adj->ignore = val; + + adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper); + if (adj) + adj->ignore = val; +} + +static void netdev_adjacent_dev_disable(struct net_device *upper_dev, + struct net_device *lower_dev) +{ + __netdev_adjacent_dev_set(upper_dev, lower_dev, true); +} + +static void netdev_adjacent_dev_enable(struct net_device *upper_dev, + struct net_device *lower_dev) +{ + __netdev_adjacent_dev_set(upper_dev, lower_dev, false); +} + +int netdev_adjacent_change_prepare(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev, + struct netlink_ext_ack *extack) +{ + int err; + + if (!new_dev) + return 0; + + if (old_dev && new_dev != old_dev) + netdev_adjacent_dev_disable(dev, old_dev); + + err = netdev_upper_dev_link(new_dev, dev, extack); + if (err) { + if (old_dev && new_dev != old_dev) + netdev_adjacent_dev_enable(dev, old_dev); + return err; + } + + return 0; +} +EXPORT_SYMBOL(netdev_adjacent_change_prepare); + +void netdev_adjacent_change_commit(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev) +{ + if (!new_dev || !old_dev) + return; + + if (new_dev == old_dev) + return; + + netdev_adjacent_dev_enable(dev, old_dev); + netdev_upper_dev_unlink(old_dev, dev); +} +EXPORT_SYMBOL(netdev_adjacent_change_commit); + +void netdev_adjacent_change_abort(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev) +{ + if (!new_dev) + return; + + if (old_dev && new_dev != old_dev) + netdev_adjacent_dev_enable(dev, old_dev); + + netdev_upper_dev_unlink(new_dev, dev); +} +EXPORT_SYMBOL(netdev_adjacent_change_abort); + /** * netdev_bonding_info_change - Dispatch event about slave change * @dev: device -- cgit v1.2.3 From 0ce1822c2a08f6e05e22239bcb1778dcc916c7bc Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:57 +0000 Subject: vxlan: add adjacent link to limit depth level Current vxlan code doesn't limit the number of nested devices. Nested devices would be handled recursively and this routine needs huge stack memory. So, unlimited nested devices could make stack overflow. In order to fix this issue, this patch adds adjacent links. The adjacent link APIs internally check the depth level. Test commands: ip link add dummy0 type dummy ip link add vxlan0 type vxlan id 0 group 239.1.1.1 dev dummy0 \ dstport 4789 for i in {1..100} do let A=$i-1 ip link add vxlan$i type vxlan id $i group 239.1.1.1 \ dev vxlan$A dstport 4789 done ip link del dummy0 The top upper link is vxlan100 and the lowest link is vxlan0. When vxlan0 is deleting, the upper devices will be deleted recursively. It needs huge stack memory so it makes stack overflow. Splat looks like: [ 229.628477] ============================================================================= [ 229.629785] BUG page->ptl (Not tainted): Padding overwritten. 0x0000000026abf214-0x0000000091f6abb2 [ 229.629785] ----------------------------------------------------------------------------- [ 229.629785] [ 229.655439] ================================================================== [ 229.629785] INFO: Slab 0x00000000ff7cfda8 objects=19 used=19 fp=0x00000000fe33776c flags=0x200000000010200 [ 229.655688] BUG: KASAN: stack-out-of-bounds in unmap_single_vma+0x25a/0x2e0 [ 229.655688] Read of size 8 at addr ffff888113076928 by task vlan-network-in/2334 [ 229.655688] [ 229.629785] Padding 0000000026abf214: 00 80 14 0d 81 88 ff ff 68 91 81 14 81 88 ff ff ........h....... [ 229.629785] Padding 0000000001e24790: 38 91 81 14 81 88 ff ff 68 91 81 14 81 88 ff ff 8.......h....... [ 229.629785] Padding 00000000b39397c8: 33 30 62 a7 ff ff ff ff ff eb 60 22 10 f1 ff 1f 30b.......`".... [ 229.629785] Padding 00000000bc98f53a: 80 60 07 13 81 88 ff ff 00 80 14 0d 81 88 ff ff .`.............. [ 229.629785] Padding 000000002aa8123d: 68 91 81 14 81 88 ff ff f7 21 17 a7 ff ff ff ff h........!...... [ 229.629785] Padding 000000001c8c2369: 08 81 14 0d 81 88 ff ff 03 02 00 00 00 00 00 00 ................ [ 229.629785] Padding 000000004e290c5d: 21 90 a2 21 10 ed ff ff 00 00 00 00 00 fc ff df !..!............ [ 229.629785] Padding 000000000e25d731: 18 60 07 13 81 88 ff ff c0 8b 13 05 81 88 ff ff .`.............. [ 229.629785] Padding 000000007adc7ab3: b3 8a b5 41 00 00 00 00 ...A.... [ 229.629785] FIX page->ptl: Restoring 0x0000000026abf214-0x0000000091f6abb2=0x5a [ ... ] Fixes: acaf4e70997f ("net: vxlan: when lower dev unregisters remove vxlan dev as well") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------- include/net/vxlan.h | 1 + 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 3d9bcc957f7d..fcf028220bca 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3566,10 +3566,13 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); + struct net_device *remote_dev = NULL; struct vxlan_fdb *f = NULL; bool unregister = false; + struct vxlan_rdst *dst; int err; + dst = &vxlan->default_dst; err = vxlan_dev_configure(net, dev, conf, false, extack); if (err) return err; @@ -3577,14 +3580,14 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, dev->ethtool_ops = &vxlan_ethtool_ops; /* create an fdb entry for a valid default destination */ - if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { + if (!vxlan_addr_any(&dst->remote_ip)) { err = vxlan_fdb_create(vxlan, all_zeros_mac, - &vxlan->default_dst.remote_ip, + &dst->remote_ip, NUD_REACHABLE | NUD_PERMANENT, vxlan->cfg.dst_port, - vxlan->default_dst.remote_vni, - vxlan->default_dst.remote_vni, - vxlan->default_dst.remote_ifindex, + dst->remote_vni, + dst->remote_vni, + dst->remote_ifindex, NTF_SELF, &f); if (err) return err; @@ -3595,26 +3598,41 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, goto errout; unregister = true; + if (dst->remote_ifindex) { + remote_dev = __dev_get_by_index(net, dst->remote_ifindex); + if (!remote_dev) + goto errout; + + err = netdev_upper_dev_link(remote_dev, dev, extack); + if (err) + goto errout; + } + err = rtnl_configure_link(dev, NULL); if (err) - goto errout; + goto unlink; if (f) { - vxlan_fdb_insert(vxlan, all_zeros_mac, - vxlan->default_dst.remote_vni, f); + vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f); /* notify default fdb entry */ err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH, true, extack); if (err) { vxlan_fdb_destroy(vxlan, f, false, false); + if (remote_dev) + netdev_upper_dev_unlink(remote_dev, dev); goto unregister; } } list_add(&vxlan->next, &vn->vxlan_list); + if (remote_dev) + dst->remote_dev = remote_dev; return 0; - +unlink: + if (remote_dev) + netdev_upper_dev_unlink(remote_dev, dev); errout: /* unregister_netdevice() destroys the default FDB entry with deletion * notification. But the addition notification was not sent yet, so @@ -3932,11 +3950,12 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_rdst *dst = &vxlan->default_dst; struct net_device *lowerdev; struct vxlan_config conf; + struct vxlan_rdst *dst; int err; + dst = &vxlan->default_dst; err = vxlan_nl2conf(tb, data, dev, &conf, true, extack); if (err) return err; @@ -3946,6 +3965,11 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], if (err) return err; + err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev, + extack); + if (err) + return err; + /* handle default dst entry */ if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) { u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni); @@ -3962,6 +3986,8 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], NTF_SELF, true, extack); if (err) { spin_unlock_bh(&vxlan->hash_lock[hash_index]); + netdev_adjacent_change_abort(dst->remote_dev, + lowerdev, dev); return err; } } @@ -3979,6 +4005,11 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], if (conf.age_interval != vxlan->cfg.age_interval) mod_timer(&vxlan->age_timer, jiffies); + netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev); + if (lowerdev && lowerdev != dst->remote_dev) + dst->remote_dev = lowerdev; + + netdev_update_lockdep_key(lowerdev); vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true); return 0; } @@ -3991,6 +4022,8 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head) list_del(&vxlan->next); unregister_netdevice_queue(dev, head); + if (vxlan->default_dst.remote_dev) + netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev); } static size_t vxlan_get_size(const struct net_device *dev) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 335283dbe9b3..373aadcfea21 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -197,6 +197,7 @@ struct vxlan_rdst { u8 offloaded:1; __be32 remote_vni; u32 remote_ifindex; + struct net_device *remote_dev; struct list_head list; struct rcu_head rcu; struct dst_cache dst_cache; -- cgit v1.2.3 From f3b0a18bb6cb07a9abb75e21b1f08eeaefa78e81 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:58 +0000 Subject: net: remove unnecessary variables and callback This patch removes variables and callback these are related to the nested device structure. devices that can be nested have their own nest_level variable that represents the depth of nested devices. In the previous patch, new {lower/upper}_level variables are added and they replace old private nest_level variable. So, this patch removes all 'nest_level' variables. In order to avoid lockdep warning, ->ndo_get_lock_subclass() was added to get lockdep subclass value, which is actually lower nested depth value. But now, they use the dynamic lockdep key to avoid lockdep warning instead of the subclass. So, this patch removes ->ndo_get_lock_subclass() callback. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 2 +- drivers/net/bonding/bond_main.c | 15 --------------- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- drivers/net/macsec.c | 9 --------- drivers/net/macvlan.c | 7 ------- include/linux/if_macvlan.h | 1 - include/linux/if_vlan.h | 11 ----------- include/linux/netdevice.h | 12 ------------ include/net/bonding.h | 1 - net/8021q/vlan.c | 1 - net/8021q/vlan_dev.c | 6 ------ net/core/dev.c | 19 ------------------- net/core/dev_addr_lists.c | 12 ++++++------ net/smc/smc_core.c | 2 +- net/smc/smc_pnet.c | 2 +- 15 files changed, 10 insertions(+), 92 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 8c79bad2a9a5..4f2e6910c623 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -952,7 +952,7 @@ static int alb_upper_dev_walk(struct net_device *upper, void *_data) struct bond_vlan_tag *tags; if (is_vlan_dev(upper) && - bond->nest_level == vlan_get_encap_level(upper) - 1) { + bond->dev->lower_level == upper->lower_level - 1) { if (upper->addr_assign_type == NET_ADDR_STOLEN) { alb_send_lp_vid(slave, mac_addr, vlan_dev_vlan_proto(upper), diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6a6273590288..a48950b81434 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1733,8 +1733,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, goto err_upper_unlink; } - bond->nest_level = dev_get_nest_level(bond_dev) + 1; - /* If the mode uses primary, then the following is handled by * bond_change_active_slave(). */ @@ -1957,9 +1955,6 @@ static int __bond_release_one(struct net_device *bond_dev, if (!bond_has_slaves(bond)) { bond_set_carrier(bond); eth_hw_addr_random(bond_dev); - bond->nest_level = SINGLE_DEPTH_NESTING; - } else { - bond->nest_level = dev_get_nest_level(bond_dev) + 1; } unblock_netpoll_tx(); @@ -3444,13 +3439,6 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res, } } -static int bond_get_nest_level(struct net_device *bond_dev) -{ - struct bonding *bond = netdev_priv(bond_dev); - - return bond->nest_level; -} - static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats) { @@ -4270,7 +4258,6 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_neigh_setup = bond_neigh_setup, .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, - .ndo_get_lock_subclass = bond_get_nest_level, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_setup = bond_netpoll_setup, .ndo_netpoll_cleanup = bond_netpoll_cleanup, @@ -4769,8 +4756,6 @@ static int bond_init(struct net_device *bond_dev) if (!bond->wq) return -ENOMEM; - bond->nest_level = SINGLE_DEPTH_NESTING; - spin_lock_init(&bond->mode_lock); spin_lock_init(&bond->stats_lock); lockdep_register_key(&bond->stats_lock_key); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3e78a727f3e6..c4c59d2e676e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3160,7 +3160,7 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr, u32 *action) { - int nest_level = vlan_get_encap_level(attr->parse_attr->filter_dev); + int nest_level = attr->parse_attr->filter_dev->lower_level; struct flow_action_entry vlan_act = { .id = FLOW_ACTION_VLAN_POP, }; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 9e97b66b26d3..afd8b2a08245 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -267,7 +267,6 @@ struct macsec_dev { struct pcpu_secy_stats __percpu *stats; struct list_head secys; struct gro_cells gro_cells; - unsigned int nest_level; }; /** @@ -2957,11 +2956,6 @@ static int macsec_get_iflink(const struct net_device *dev) return macsec_priv(dev)->real_dev->ifindex; } -static int macsec_get_nest_level(struct net_device *dev) -{ - return macsec_priv(dev)->nest_level; -} - static const struct net_device_ops macsec_netdev_ops = { .ndo_init = macsec_dev_init, .ndo_uninit = macsec_dev_uninit, @@ -2975,7 +2969,6 @@ static const struct net_device_ops macsec_netdev_ops = { .ndo_start_xmit = macsec_start_xmit, .ndo_get_stats64 = macsec_get_stats64, .ndo_get_iflink = macsec_get_iflink, - .ndo_get_lock_subclass = macsec_get_nest_level, }; static const struct device_type macsec_type = { @@ -3258,8 +3251,6 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (err < 0) return err; - macsec->nest_level = dev_get_nest_level(real_dev) + 1; - err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) goto unregister; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 0354e9be2ca5..34fc59bd1e20 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -867,11 +867,6 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) #define MACVLAN_STATE_MASK \ ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) -static int macvlan_get_nest_level(struct net_device *dev) -{ - return ((struct macvlan_dev *)netdev_priv(dev))->nest_level; -} - static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); @@ -1149,7 +1144,6 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_fdb_add = macvlan_fdb_add, .ndo_fdb_del = macvlan_fdb_del, .ndo_fdb_dump = ndo_dflt_fdb_dump, - .ndo_get_lock_subclass = macvlan_get_nest_level, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = macvlan_dev_poll_controller, .ndo_netpoll_setup = macvlan_dev_netpoll_setup, @@ -1433,7 +1427,6 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, vlan->dev = dev; vlan->port = port; vlan->set_features = MACVLAN_FEATURES; - vlan->nest_level = dev_get_nest_level(lowerdev) + 1; vlan->mode = MACVLAN_MODE_VEPA; if (data && data[IFLA_MACVLAN_MODE]) diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 2e55e4cdbd8a..a367ead4bf4b 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -29,7 +29,6 @@ struct macvlan_dev { netdev_features_t set_features; enum macvlan_mode mode; u16 flags; - int nest_level; unsigned int macaddr_count; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 244278d5c222..b05e855f1ddd 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -182,7 +182,6 @@ struct vlan_dev_priv { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif - unsigned int nest_level; }; static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) @@ -221,11 +220,6 @@ extern void vlan_vids_del_by_dev(struct net_device *dev, extern bool vlan_uses_dev(const struct net_device *dev); -static inline int vlan_get_encap_level(struct net_device *dev) -{ - BUG_ON(!is_vlan_dev(dev)); - return vlan_dev_priv(dev)->nest_level; -} #else static inline struct net_device * __vlan_find_dev_deep_rcu(struct net_device *real_dev, @@ -295,11 +289,6 @@ static inline bool vlan_uses_dev(const struct net_device *dev) { return false; } -static inline int vlan_get_encap_level(struct net_device *dev) -{ - BUG(); - return 0; -} #endif /** diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6c6490e15cd4..c20f190b4c18 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1422,7 +1422,6 @@ struct net_device_ops { void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv); - int (*ndo_get_lock_subclass)(struct net_device *dev); int (*ndo_set_tx_maxrate)(struct net_device *dev, int queue_index, u32 maxrate); @@ -4051,16 +4050,6 @@ static inline void netif_addr_lock(struct net_device *dev) spin_lock(&dev->addr_list_lock); } -static inline void netif_addr_lock_nested(struct net_device *dev) -{ - int subclass = SINGLE_DEPTH_NESTING; - - if (dev->netdev_ops->ndo_get_lock_subclass) - subclass = dev->netdev_ops->ndo_get_lock_subclass(dev); - - spin_lock_nested(&dev->addr_list_lock, subclass); -} - static inline void netif_addr_lock_bh(struct net_device *dev) { spin_lock_bh(&dev->addr_list_lock); @@ -4345,7 +4334,6 @@ void netdev_lower_state_changed(struct net_device *lower_dev, extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len); -int dev_get_nest_level(struct net_device *dev); int skb_checksum_help(struct sk_buff *skb); int skb_crc32c_csum_help(struct sk_buff *skb); int skb_csum_hwoffload_help(struct sk_buff *skb, diff --git a/include/net/bonding.h b/include/net/bonding.h index 334909feb2bb..1afc125014da 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -203,7 +203,6 @@ struct bonding { struct slave __rcu *primary_slave; struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */ bool force_primary; - u32 nest_level; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, struct slave *); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 54728d2eda18..d4bcfd8f95bf 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -172,7 +172,6 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) if (err < 0) goto out_uninit_mvrp; - vlan->nest_level = dev_get_nest_level(real_dev) + 1; err = register_netdevice(dev); if (err < 0) goto out_uninit_mvrp; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 6e6f26bf6e73..e5bff5cc6f97 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -489,11 +489,6 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } -static int vlan_dev_get_lock_subclass(struct net_device *dev) -{ - return vlan_dev_priv(dev)->nest_level; -} - static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, .parse = eth_header_parse, @@ -785,7 +780,6 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, - .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, .ndo_get_iflink = vlan_dev_get_iflink, }; diff --git a/net/core/dev.c b/net/core/dev.c index 092c094038b6..1482e2ef2d25 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7615,25 +7615,6 @@ void *netdev_lower_dev_get_private(struct net_device *dev, EXPORT_SYMBOL(netdev_lower_dev_get_private); -int dev_get_nest_level(struct net_device *dev) -{ - struct net_device *lower = NULL; - struct list_head *iter; - int max_nest = -1; - int nest; - - ASSERT_RTNL(); - - netdev_for_each_lower_dev(dev, lower, iter) { - nest = dev_get_nest_level(lower); - if (max_nest < nest) - max_nest = nest; - } - - return max_nest + 1; -} -EXPORT_SYMBOL(dev_get_nest_level); - /** * netdev_lower_change - Dispatch event about lower device state change * @lower_dev: device diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 6393ba930097..2f949b5a1eb9 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -691,7 +691,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) return; netif_addr_lock_bh(from); - netif_addr_lock_nested(to); + netif_addr_lock(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); @@ -858,7 +858,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -888,7 +888,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -912,7 +912,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) return; netif_addr_lock_bh(from); - netif_addr_lock_nested(to); + netif_addr_lock(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 88556f0251ab..2ba97ff325a5 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -561,7 +561,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) } rtnl_lock(); - nest_lvl = dev_get_nest_level(ndev); + nest_lvl = ndev->lower_level; for (i = 0; i < nest_lvl; i++) { struct list_head *lower = &ndev->adj_list.lower; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index bab2da8cf17a..2920b006f65c 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -718,7 +718,7 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev) int i, nest_lvl; rtnl_lock(); - nest_lvl = dev_get_nest_level(ndev); + nest_lvl = ndev->lower_level; for (i = 0; i < nest_lvl; i++) { struct list_head *lower = &ndev->adj_list.lower; -- cgit v1.2.3 From 1962f86b42ed06ea6af9ff09390243b99d9eb83a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:59 +0000 Subject: virt_wifi: fix refcnt leak in module exit routine virt_wifi_newlink() calls netdev_upper_dev_link() and it internally holds reference count of lower interface. Current code does not release a reference count of the lower interface when the lower interface is being deleted. So, reference count leaks occur. Test commands: ip link add dummy0 type dummy ip link add vw1 link dummy0 type virt_wifi ip link del dummy0 Splat looks like: [ 133.787526][ T788] WARNING: CPU: 1 PID: 788 at net/core/dev.c:8274 rollback_registered_many+0x835/0xc80 [ 133.788355][ T788] Modules linked in: virt_wifi cfg80211 dummy team af_packet sch_fq_codel ip_tables x_tables unix [ 133.789377][ T788] CPU: 1 PID: 788 Comm: ip Not tainted 5.4.0-rc3+ #96 [ 133.790069][ T788] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 133.791167][ T788] RIP: 0010:rollback_registered_many+0x835/0xc80 [ 133.791906][ T788] Code: 00 4d 85 ff 0f 84 b5 fd ff ff ba c0 0c 00 00 48 89 de 4c 89 ff e8 9b 58 04 00 48 89 df e8 30 [ 133.794317][ T788] RSP: 0018:ffff88805ba3f338 EFLAGS: 00010202 [ 133.795080][ T788] RAX: ffff88805e57e801 RBX: ffff88805ba34000 RCX: ffffffffa9294723 [ 133.796045][ T788] RDX: 1ffff1100b746816 RSI: 0000000000000008 RDI: ffffffffabcc4240 [ 133.797006][ T788] RBP: ffff88805ba3f4c0 R08: fffffbfff5798849 R09: fffffbfff5798849 [ 133.797993][ T788] R10: 0000000000000001 R11: fffffbfff5798848 R12: dffffc0000000000 [ 133.802514][ T788] R13: ffff88805ba3f440 R14: ffff88805ba3f400 R15: ffff88805ed622c0 [ 133.803237][ T788] FS: 00007f2e9608c0c0(0000) GS:ffff88806cc00000(0000) knlGS:0000000000000000 [ 133.804002][ T788] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 133.804664][ T788] CR2: 00007f2e95610603 CR3: 000000005f68c004 CR4: 00000000000606e0 [ 133.805363][ T788] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 133.806073][ T788] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 133.806787][ T788] Call Trace: [ 133.807069][ T788] ? generic_xdp_install+0x310/0x310 [ 133.807612][ T788] ? lock_acquire+0x164/0x3b0 [ 133.808077][ T788] ? is_bpf_text_address+0x5/0xf0 [ 133.808640][ T788] ? deref_stack_reg+0x9c/0xd0 [ 133.809138][ T788] ? __nla_validate_parse+0x98/0x1ab0 [ 133.809944][ T788] unregister_netdevice_many.part.122+0x13/0x1b0 [ 133.810599][ T788] rtnl_delete_link+0xbc/0x100 [ 133.811073][ T788] ? rtnl_af_register+0xc0/0xc0 [ 133.811672][ T788] rtnl_dellink+0x30e/0x8a0 [ 133.812205][ T788] ? is_bpf_text_address+0x5/0xf0 [ ... ] [ 144.110530][ T788] unregister_netdevice: waiting for dummy0 to become free. Usage count = 1 This patch adds notifier routine to delete upper interface before deleting lower interface. Fixes: c7cdba31ed8b ("mac80211-next: rtnetlink wifi simulation device") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/wireless/virt_wifi.c | 54 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index be92e1220284..7997cc6de334 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -548,6 +548,7 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, priv->is_connected = false; priv->is_up = false; INIT_DELAYED_WORK(&priv->connect, virt_wifi_connect_complete); + __module_get(THIS_MODULE); return 0; unregister_netdev: @@ -578,6 +579,7 @@ static void virt_wifi_dellink(struct net_device *dev, netdev_upper_dev_unlink(priv->lowerdev, dev); unregister_netdevice_queue(dev, head); + module_put(THIS_MODULE); /* Deleting the wiphy is handled in the module destructor. */ } @@ -590,6 +592,42 @@ static struct rtnl_link_ops virt_wifi_link_ops = { .priv_size = sizeof(struct virt_wifi_netdev_priv), }; +static bool netif_is_virt_wifi_dev(const struct net_device *dev) +{ + return rcu_access_pointer(dev->rx_handler) == virt_wifi_rx_handler; +} + +static int virt_wifi_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *lower_dev = netdev_notifier_info_to_dev(ptr); + struct virt_wifi_netdev_priv *priv; + struct net_device *upper_dev; + LIST_HEAD(list_kill); + + if (!netif_is_virt_wifi_dev(lower_dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UNREGISTER: + priv = rtnl_dereference(lower_dev->rx_handler_data); + if (!priv) + return NOTIFY_DONE; + + upper_dev = priv->upperdev; + + upper_dev->rtnl_link_ops->dellink(upper_dev, &list_kill); + unregister_netdevice_many(&list_kill); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block virt_wifi_notifier = { + .notifier_call = virt_wifi_event, +}; + /* Acquires and releases the rtnl lock. */ static int __init virt_wifi_init_module(void) { @@ -598,14 +636,25 @@ static int __init virt_wifi_init_module(void) /* Guaranteed to be locallly-administered and not multicast. */ eth_random_addr(fake_router_bssid); + err = register_netdevice_notifier(&virt_wifi_notifier); + if (err) + return err; + + err = -ENOMEM; common_wiphy = virt_wifi_make_wiphy(); if (!common_wiphy) - return -ENOMEM; + goto notifier; err = rtnl_link_register(&virt_wifi_link_ops); if (err) - virt_wifi_destroy_wiphy(common_wiphy); + goto destroy_wiphy; + return 0; + +destroy_wiphy: + virt_wifi_destroy_wiphy(common_wiphy); +notifier: + unregister_netdevice_notifier(&virt_wifi_notifier); return err; } @@ -615,6 +664,7 @@ static void __exit virt_wifi_cleanup_module(void) /* Will delete any devices that depend on the wiphy. */ rtnl_link_unregister(&virt_wifi_link_ops); virt_wifi_destroy_wiphy(common_wiphy); + unregister_netdevice_notifier(&virt_wifi_notifier); } module_init(virt_wifi_init_module); -- cgit v1.2.3 From 549af00833028b5803528553a4743e0cd1fdbee9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 15 Oct 2019 11:07:33 +0300 Subject: IB/core: Avoid deadlock during netlink message handling When rdmacm module is not loaded, and when netlink message is received to get char device info, it results into a deadlock due to recursive locking of rdma_nl_mutex with the below call sequence. [..] rdma_nl_rcv() mutex_lock() [..] rdma_nl_rcv_msg() ib_get_client_nl_info() request_module() iw_cm_init() rdma_nl_register() mutex_lock(); <- Deadlock, acquiring mutex again Due to above call sequence, following call trace and deadlock is observed. kernel: __mutex_lock+0x35e/0x860 kernel: ? __mutex_lock+0x129/0x860 kernel: ? rdma_nl_register+0x1a/0x90 [ib_core] kernel: rdma_nl_register+0x1a/0x90 [ib_core] kernel: ? 0xffffffffc029b000 kernel: iw_cm_init+0x34/0x1000 [iw_cm] kernel: do_one_initcall+0x67/0x2d4 kernel: ? kmem_cache_alloc_trace+0x1ec/0x2a0 kernel: do_init_module+0x5a/0x223 kernel: load_module+0x1998/0x1e10 kernel: ? __symbol_put+0x60/0x60 kernel: __do_sys_finit_module+0x94/0xe0 kernel: do_syscall_64+0x5a/0x270 kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe process stack trace: [<0>] __request_module+0x1c9/0x460 [<0>] ib_get_client_nl_info+0x5e/0xb0 [ib_core] [<0>] nldev_get_chardev+0x1ac/0x320 [ib_core] [<0>] rdma_nl_rcv_msg+0xeb/0x1d0 [ib_core] [<0>] rdma_nl_rcv+0xcd/0x120 [ib_core] [<0>] netlink_unicast+0x179/0x220 [<0>] netlink_sendmsg+0x2f6/0x3f0 [<0>] sock_sendmsg+0x30/0x40 [<0>] ___sys_sendmsg+0x27a/0x290 [<0>] __sys_sendmsg+0x58/0xa0 [<0>] do_syscall_64+0x5a/0x270 [<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe To overcome this deadlock and to allow multiple netlink messages to progress in parallel, following scheme is implemented. 1. Split the lock protecting the cb_table into a per-index lock, and make it a rwlock. This lock is used to ensure no callbacks are running after unregistration returns. Since a module will not be registered once it is already running callbacks, this avoids the deadlock. 2. Use smp_store_release() to update the cb_table during registration so that no lock is required. This avoids lockdep problems with thinking all the rwsems are the same lock class. Fixes: 0e2d00eb6fd45 ("RDMA: Add NLDEV_GET_CHARDEV to allow char dev discovery and autoload") Link: https://lore.kernel.org/r/20191015080733.18625-1-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 1 + drivers/infiniband/core/device.c | 2 + drivers/infiniband/core/netlink.c | 107 ++++++++++++++++++------------------ 3 files changed, 56 insertions(+), 54 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 3a8b0911c3bc..9d07378b5b42 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -199,6 +199,7 @@ void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); +void rdma_nl_init(void); void rdma_nl_exit(void); int ib_nl_handle_resolve_resp(struct sk_buff *skb, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 2dd2cfe9b561..50a92442c4f7 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2716,6 +2716,8 @@ static int __init ib_core_init(void) goto err_comp_unbound; } + rdma_nl_init(); + ret = addr_init(); if (ret) { pr_warn("Could't init IB address resolution\n"); diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 81dbd5f41bed..8cd31ef25eff 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -42,9 +42,12 @@ #include #include "core_priv.h" -static DEFINE_MUTEX(rdma_nl_mutex); static struct { - const struct rdma_nl_cbs *cb_table; + const struct rdma_nl_cbs *cb_table; + /* Synchronizes between ongoing netlink commands and netlink client + * unregistration. + */ + struct rw_semaphore sem; } rdma_nl_types[RDMA_NL_NUM_CLIENTS]; bool rdma_nl_chk_listeners(unsigned int group) @@ -75,70 +78,53 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op) return (op < max_num_ops[type]) ? true : false; } -static bool -is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op) +static const struct rdma_nl_cbs * +get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op) { const struct rdma_nl_cbs *cb_table; - if (!is_nl_msg_valid(type, op)) - return false; - /* * Currently only NLDEV client is supporting netlink commands in * non init_net net namespace. */ if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV) - return false; + return NULL; - if (!rdma_nl_types[type].cb_table) { - mutex_unlock(&rdma_nl_mutex); - request_module("rdma-netlink-subsys-%d", type); - mutex_lock(&rdma_nl_mutex); - } + cb_table = READ_ONCE(rdma_nl_types[type].cb_table); + if (!cb_table) { + /* + * Didn't get valid reference of the table, attempt module + * load once. + */ + up_read(&rdma_nl_types[type].sem); - cb_table = rdma_nl_types[type].cb_table; + request_module("rdma-netlink-subsys-%d", type); + down_read(&rdma_nl_types[type].sem); + cb_table = READ_ONCE(rdma_nl_types[type].cb_table); + } if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit)) - return false; - return true; + return NULL; + return cb_table; } void rdma_nl_register(unsigned int index, const struct rdma_nl_cbs cb_table[]) { - mutex_lock(&rdma_nl_mutex); - if (!is_nl_msg_valid(index, 0)) { - /* - * All clients are not interesting in success/failure of - * this call. They want to see the print to error log and - * continue their initialization. Print warning for them, - * because it is programmer's error to be here. - */ - mutex_unlock(&rdma_nl_mutex); - WARN(true, - "The not-valid %u index was supplied to RDMA netlink\n", - index); + if (WARN_ON(!is_nl_msg_valid(index, 0)) || + WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table))) return; - } - - if (rdma_nl_types[index].cb_table) { - mutex_unlock(&rdma_nl_mutex); - WARN(true, - "The %u index is already registered in RDMA netlink\n", - index); - return; - } - rdma_nl_types[index].cb_table = cb_table; - mutex_unlock(&rdma_nl_mutex); + /* Pairs with the READ_ONCE in is_nl_valid() */ + smp_store_release(&rdma_nl_types[index].cb_table, cb_table); } EXPORT_SYMBOL(rdma_nl_register); void rdma_nl_unregister(unsigned int index) { - mutex_lock(&rdma_nl_mutex); + down_write(&rdma_nl_types[index].sem); rdma_nl_types[index].cb_table = NULL; - mutex_unlock(&rdma_nl_mutex); + up_write(&rdma_nl_types[index].sem); } EXPORT_SYMBOL(rdma_nl_unregister); @@ -170,15 +156,21 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int index = RDMA_NL_GET_CLIENT(type); unsigned int op = RDMA_NL_GET_OP(type); const struct rdma_nl_cbs *cb_table; + int err = -EINVAL; - if (!is_nl_valid(skb, index, op)) + if (!is_nl_msg_valid(index, op)) return -EINVAL; - cb_table = rdma_nl_types[index].cb_table; + down_read(&rdma_nl_types[index].sem); + cb_table = get_cb_table(skb, index, op); + if (!cb_table) + goto done; if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) && - !netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; + !netlink_capable(skb, CAP_NET_ADMIN)) { + err = -EPERM; + goto done; + } /* * LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't @@ -186,8 +178,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, */ if (index == RDMA_NL_LS) { if (cb_table[op].doit) - return cb_table[op].doit(skb, nlh, extack); - return -EINVAL; + err = cb_table[op].doit(skb, nlh, extack); + goto done; } /* FIXME: Convert IWCM to properly handle doit callbacks */ if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) { @@ -195,14 +187,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, .dump = cb_table[op].dump, }; if (c.dump) - return netlink_dump_start(skb->sk, skb, nlh, &c); - return -EINVAL; + err = netlink_dump_start(skb->sk, skb, nlh, &c); + goto done; } if (cb_table[op].doit) - return cb_table[op].doit(skb, nlh, extack); - - return 0; + err = cb_table[op].doit(skb, nlh, extack); +done: + up_read(&rdma_nl_types[index].sem); + return err; } /* @@ -263,9 +256,7 @@ skip: static void rdma_nl_rcv(struct sk_buff *skb) { - mutex_lock(&rdma_nl_mutex); rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg); - mutex_unlock(&rdma_nl_mutex); } int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid) @@ -297,6 +288,14 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb, } EXPORT_SYMBOL(rdma_nl_multicast); +void rdma_nl_init(void) +{ + int idx; + + for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) + init_rwsem(&rdma_nl_types[idx].sem); +} + void rdma_nl_exit(void) { int idx; -- cgit v1.2.3 From fc5b220b2dcf8b512d9bd46fd17f82257e49bf89 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 23 Oct 2019 13:21:50 -0700 Subject: scsi: target: cxgbit: Fix cxgbit_fw4_ack() Use the pointer 'p' after having tested that pointer instead of before. Fixes: 5cadafb236df ("target/cxgbit: Fix endianness annotations") Cc: Varun Prakash Cc: Nicholas Bellinger Cc: Link: https://lore.kernel.org/r/20191023202150.22173-1-bvanassche@acm.org Reported-by: Dan Carpenter Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/cxgbit/cxgbit_cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 22dd4c457d6a..23a90c685dc6 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -1829,7 +1829,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb) while (credits) { struct sk_buff *p = cxgbit_sock_peek_wr(csk); - const u32 csum = (__force u32)p->csum; + u32 csum; if (unlikely(!p)) { pr_err("csk 0x%p,%u, cr %u,%u+%u, empty.\n", @@ -1838,6 +1838,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb) break; } + csum = (__force u32)p->csum; if (unlikely(credits < csum)) { pr_warn("csk 0x%p,%u, cr %u,%u+%u, < %u.\n", csk, csk->tid, -- cgit v1.2.3 From 0cf9f4e547cebb5f5d2d046437c71ddcc8ea4a39 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Tue, 22 Oct 2019 14:27:08 +0800 Subject: scsi: sd: define variable dif as unsigned int instead of bool Variable dif in function sd_setup_read_write_cmnd() is the return value of function scsi_host_dif_capable() which returns dif capability of disks. If define it as bool, even for the disks which support DIF3, the function still return dif=1, which causes IO error. So define variable dif as unsigned int instead of bool. Fixes: e249e42d277e ("scsi: sd: Clean up sd_setup_read_write_cmnd()") Link: https://lore.kernel.org/r/1571725628-132736-1-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Xiang Chen Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0f96eb0ddbfa..fe05475ce5dc 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1166,11 +1166,12 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) sector_t lba = sectors_to_logical(sdp, blk_rq_pos(rq)); sector_t threshold; unsigned int nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); - bool dif, dix; unsigned int mask = logical_to_sectors(sdp, 1) - 1; bool write = rq_data_dir(rq) == WRITE; unsigned char protect, fua; blk_status_t ret; + unsigned int dif; + bool dix; ret = scsi_init_io(cmd); if (ret != BLK_STS_OK) -- cgit v1.2.3 From 7eb3894b2fac978f811684e3ccb3cb0ad7820bef Mon Sep 17 00:00:00 2001 From: Yuantian Tang Date: Thu, 10 Oct 2019 16:33:34 +0800 Subject: arm64: dts: ls1028a: fix a compatible issue The I2C multiplexer used on ls1028aqds is PCA9547, not PCA9847. If the wrong compatible was used, this chip will not be able to be probed correctly and hence fail to work. Signed-off-by: Yuantian Tang Acked-by: Li Yang Fixes: 8897f3255c9c ("arm64: dts: Add support for NXP LS1028A SoC") Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts index d98346da01df..078a5010228c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts @@ -127,7 +127,7 @@ status = "okay"; i2c-mux@77 { - compatible = "nxp,pca9847"; + compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3 From e9323b664ce29547d996195e8a6129a351c39108 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 25 Oct 2019 11:02:01 +0200 Subject: clk: samsung: exynos5420: Preserve PLL configuration during suspend/resume Properly save and restore all top PLL related configuration registers during suspend/resume cycle. So far driver only handled EPLL and RPLL clocks, all other were reset to default values after suspend/resume cycle. This caused for example lower G3D (MALI Panfrost) performance after system resume, even if performance governor has been selected. Reported-by: Reported-by: Marian Mihailescu Fixes: 773424326b51 ("clk: samsung: exynos5420: add more registers to restore list") Signed-off-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-exynos5420.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index dfa862d55246..31466cd1842f 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -165,12 +165,18 @@ static const unsigned long exynos5x_clk_regs[] __initconst = { GATE_BUS_CPU, GATE_SCLK_CPU, CLKOUT_CMU_CPU, + CPLL_CON0, + DPLL_CON0, EPLL_CON0, EPLL_CON1, EPLL_CON2, RPLL_CON0, RPLL_CON1, RPLL_CON2, + IPLL_CON0, + SPLL_CON0, + VPLL_CON0, + MPLL_CON0, SRC_TOP0, SRC_TOP1, SRC_TOP2, -- cgit v1.2.3 From a1bb46c36ce389d4a24a42e5b6047b0626caa3ea Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 25 Oct 2019 02:41:40 +0200 Subject: ACPI: processor: Add QoS requests for all CPUs The _PPC change notifications from the platform firmware are per-CPU, so acpi_processor_ppc_init() needs to add a frequency QoS request for each CPU covered by a cpufreq policy to take all of them into account. Even though ACPI thermal control of CPUs sets frequency limits per processor package, it also needs a frequency QoS request for each CPU in a cpufreq policy in case some of them are taken offline and the frequency limit needs to be set through the remaining online ones (this is slightly excessive, because all CPUs covered by one cpufreq policy will set the same frequency limit through their QoS requests, but it is not incorrect). Modify the code in accordance with the above observations. Fixes: d15ce412737a ("ACPI: cpufreq: Switch to QoS requests instead of cpufreq notifier") Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/acpi/processor_perflib.c | 34 +++++++++++++++++++++------------- drivers/acpi/processor_thermal.c | 34 +++++++++++++++++++++------------- 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 753e171de006..5909e8fa4013 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -159,26 +159,34 @@ void acpi_processor_ignore_ppc_init(void) void acpi_processor_ppc_init(struct cpufreq_policy *policy) { - int cpu = policy->cpu; - struct acpi_processor *pr = per_cpu(processors, cpu); - int ret; + unsigned int cpu; - if (!pr) - return; + for_each_cpu(cpu, policy->related_cpus) { + struct acpi_processor *pr = per_cpu(processors, cpu); + int ret; + + if (!pr) + continue; - ret = freq_qos_add_request(&policy->constraints, &pr->perflib_req, - FREQ_QOS_MAX, INT_MAX); - if (ret < 0) - pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, - ret); + ret = freq_qos_add_request(&policy->constraints, + &pr->perflib_req, + FREQ_QOS_MAX, INT_MAX); + if (ret < 0) + pr_err("Failed to add freq constraint for CPU%d (%d)\n", + cpu, ret); + } } void acpi_processor_ppc_exit(struct cpufreq_policy *policy) { - struct acpi_processor *pr = per_cpu(processors, policy->cpu); + unsigned int cpu; - if (pr) - freq_qos_remove_request(&pr->perflib_req); + for_each_cpu(cpu, policy->related_cpus) { + struct acpi_processor *pr = per_cpu(processors, cpu); + + if (pr) + freq_qos_remove_request(&pr->perflib_req); + } } static int acpi_processor_get_performance_control(struct acpi_processor *pr) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index c77a5b1fb107..41feb88ee92d 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -127,26 +127,34 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy) { - int cpu = policy->cpu; - struct acpi_processor *pr = per_cpu(processors, cpu); - int ret; + unsigned int cpu; - if (!pr) - return; + for_each_cpu(cpu, policy->related_cpus) { + struct acpi_processor *pr = per_cpu(processors, cpu); + int ret; + + if (!pr) + continue; - ret = freq_qos_add_request(&policy->constraints, &pr->thermal_req, - FREQ_QOS_MAX, INT_MAX); - if (ret < 0) - pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, - ret); + ret = freq_qos_add_request(&policy->constraints, + &pr->thermal_req, + FREQ_QOS_MAX, INT_MAX); + if (ret < 0) + pr_err("Failed to add freq constraint for CPU%d (%d)\n", + cpu, ret); + } } void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy) { - struct acpi_processor *pr = per_cpu(processors, policy->cpu); + unsigned int cpu; + + for_each_cpu(cpu, policy->related_cpus) { + struct acpi_processor *pr = per_cpu(processors, policy->cpu); - if (pr) - freq_qos_remove_request(&pr->thermal_req); + if (pr) + freq_qos_remove_request(&pr->thermal_req); + } } #else /* ! CONFIG_CPU_FREQ */ static int cpufreq_get_max_state(unsigned int cpu) -- cgit v1.2.3 From 67d33aecd030226f0a577eb683aaa6853ecf8f91 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 24 Oct 2019 16:34:41 +0300 Subject: pinctrl: cherryview: Allocate IRQ chip dynamic Keeping the IRQ chip definition static shares it with multiple instances of the GPIO chip in the system. This is bad and now we get this warning from GPIO library: "detected irqchip that is shared with multiple gpiochips: please fix the driver." Hence, move the IRQ chip definition from being driver static into the struct intel_pinctrl. So a unique IRQ chip is used for each GPIO chip instance. This patch is heavily based on the attachment to the bug by Christoph Marz. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=202543 Fixes: 6e08d6bbebeb ("pinctrl: Add Intel Cherryview/Braswell pin controller support") Depends-on: 83b9dc11312f ("pinctrl: cherryview: Associate IRQ descriptors to irqdomain") Signed-off-by: Andy Shevchenko Signed-off-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-cherryview.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index c31266e70559..2c419fa5d1c1 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -147,6 +147,7 @@ struct chv_pin_context { * @pctldesc: Pin controller description * @pctldev: Pointer to the pin controller device * @chip: GPIO chip in this pin controller + * @irqchip: IRQ chip in this pin controller * @regs: MMIO registers * @intr_lines: Stores mapping between 16 HW interrupt wires and GPIO * offset (in GPIO number space) @@ -162,6 +163,7 @@ struct chv_pinctrl { struct pinctrl_desc pctldesc; struct pinctrl_dev *pctldev; struct gpio_chip chip; + struct irq_chip irqchip; void __iomem *regs; unsigned intr_lines[16]; const struct chv_community *community; @@ -1466,16 +1468,6 @@ static int chv_gpio_irq_type(struct irq_data *d, unsigned int type) return 0; } -static struct irq_chip chv_gpio_irqchip = { - .name = "chv-gpio", - .irq_startup = chv_gpio_irq_startup, - .irq_ack = chv_gpio_irq_ack, - .irq_mask = chv_gpio_irq_mask, - .irq_unmask = chv_gpio_irq_unmask, - .irq_set_type = chv_gpio_irq_type, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - static void chv_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); @@ -1625,7 +1617,15 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) } } - ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0, + pctrl->irqchip.name = "chv-gpio"; + pctrl->irqchip.irq_startup = chv_gpio_irq_startup; + pctrl->irqchip.irq_ack = chv_gpio_irq_ack; + pctrl->irqchip.irq_mask = chv_gpio_irq_mask; + pctrl->irqchip.irq_unmask = chv_gpio_irq_unmask; + pctrl->irqchip.irq_set_type = chv_gpio_irq_type; + pctrl->irqchip.flags = IRQCHIP_SKIP_SET_WAKE; + + ret = gpiochip_irqchip_add(chip, &pctrl->irqchip, 0, handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(pctrl->dev, "failed to add IRQ chip\n"); @@ -1642,7 +1642,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) } } - gpiochip_set_chained_irqchip(chip, &chv_gpio_irqchip, irq, + gpiochip_set_chained_irqchip(chip, &pctrl->irqchip, irq, chv_gpio_irq_handler); return 0; } -- cgit v1.2.3 From a8a30219ba78b1abb92091102b632f8e9bbdbf03 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 16 Oct 2019 18:28:33 +0200 Subject: powerpc/powernv/eeh: Fix oops when probing cxl devices Recent cleanup in the way EEH support is added to a device causes a kernel oops when the cxl driver probes a device and creates virtual devices discovered on the FPGA: BUG: Kernel NULL pointer dereference at 0x000000a0 Faulting instruction address: 0xc000000000048070 Oops: Kernel access of bad area, sig: 7 [#1] ... NIP eeh_add_device_late.part.9+0x50/0x1e0 LR eeh_add_device_late.part.9+0x3c/0x1e0 Call Trace: _dev_info+0x5c/0x6c (unreliable) pnv_pcibios_bus_add_device+0x60/0xb0 pcibios_bus_add_device+0x40/0x60 pci_bus_add_device+0x30/0x100 pci_bus_add_devices+0x64/0xd0 cxl_pci_vphb_add+0xe0/0x130 [cxl] cxl_probe+0x504/0x5b0 [cxl] local_pci_probe+0x6c/0x110 work_for_cpu_fn+0x38/0x60 The root cause is that those cxl virtual devices don't have a representation in the device tree and therefore no associated pci_dn structure. In eeh_add_device_late(), pdn is NULL, so edev is NULL and we oops. We never had explicit support for EEH for those virtual devices. Instead, EEH events are reported to the (real) pci device and handled by the cxl driver. Which can then forward to the virtual devices and handle dependencies. The fact that we try adding EEH support for the virtual devices is new and a side-effect of the recent cleanup. This patch fixes it by skipping adding EEH support on powernv for devices which don't have a pci_dn structure. The cxl driver doesn't create virtual devices on pseries so this patch doesn't fix it there intentionally. Fixes: b905f8cdca77 ("powerpc/eeh: EEH for pSeries hot plug") Signed-off-by: Frederic Barrat Reviewed-by: Sam Bobroff Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191016162833.22509-1-fbarrat@linux.ibm.com --- arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 6bc24a47e9ef..6f300ab7f0e9 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -42,7 +42,7 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); - if (eeh_has_flag(EEH_FORCE_DISABLED)) + if (!pdn || eeh_has_flag(EEH_FORCE_DISABLED)) return; dev_dbg(&pdev->dev, "EEH: Setting up device\n"); -- cgit v1.2.3 From 9121923c457d1d8667a6e3a67302c29e5c5add6b Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 24 Oct 2019 16:03:26 -0700 Subject: kvm: Allocate memslots and buses before calling kvm_arch_init_vm This reorganization will allow us to call kvm_arch_destroy_vm in the event that kvm_create_vm fails after calling kvm_arch_init_vm. Suggested-by: Junaid Shahid Signed-off-by: Jim Mattson Reviewed-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 67ef3f2e19e8..ec14dae2f538 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -627,8 +627,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) static struct kvm *kvm_create_vm(unsigned long type) { - int r, i; struct kvm *kvm = kvm_arch_alloc_vm(); + int r = -ENOMEM; + int i; if (!kvm) return ERR_PTR(-ENOMEM); @@ -643,6 +644,25 @@ static struct kvm *kvm_create_vm(unsigned long type) refcount_set(&kvm->users_count, 1); INIT_LIST_HEAD(&kvm->devices); + BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); + + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + struct kvm_memslots *slots = kvm_alloc_memslots(); + + if (!slots) + goto out_err_no_disable; + /* Generations must be different for each address space. */ + slots->generation = i; + rcu_assign_pointer(kvm->memslots[i], slots); + } + + for (i = 0; i < KVM_NR_BUSES; i++) { + rcu_assign_pointer(kvm->buses[i], + kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); + if (!kvm->buses[i]) + goto out_err_no_disable; + } + r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_no_disable; @@ -655,28 +675,10 @@ static struct kvm *kvm_create_vm(unsigned long type) INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif - BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); - - r = -ENOMEM; - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - struct kvm_memslots *slots = kvm_alloc_memslots(); - if (!slots) - goto out_err_no_srcu; - /* Generations must be different for each address space. */ - slots->generation = i; - rcu_assign_pointer(kvm->memslots[i], slots); - } - if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; if (init_srcu_struct(&kvm->irq_srcu)) goto out_err_no_irq_srcu; - for (i = 0; i < KVM_NR_BUSES; i++) { - rcu_assign_pointer(kvm->buses[i], - kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); - if (!kvm->buses[i]) - goto out_err; - } r = kvm_init_mmu_notifier(kvm); if (r) -- cgit v1.2.3 From c17add7a1c61a15578e4071ed7bfd460fd041c43 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 28 Aug 2019 10:33:12 +0800 Subject: btrfs: Consider system chunk array size for new SYSTEM chunks For SYSTEM chunks, despite the regular chunk item size limit, there is another limit due to system chunk array size. The extra limit was removed in a refactoring, so add it back. Fixes: e3ecdb3fdecf ("btrfs: factor out devs_max setting in __btrfs_alloc_chunk") CC: stable@vger.kernel.org # 5.3+ Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bdfe4493e43a..e04409f85063 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4967,6 +4967,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { max_stripe_size = SZ_32M; max_chunk_size = 2 * max_stripe_size; + devs_max = min_t(int, devs_max, BTRFS_MAX_DEVS_SYS_CHUNK); } else { btrfs_err(info, "invalid chunk type 0x%llx requested", type); -- cgit v1.2.3 From 8bb177d18f114358a57d8ae7e206861b48b8b4de Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 28 Aug 2019 10:33:13 +0800 Subject: btrfs: tree-checker: Fix wrong check on max devid [BUG] The following script will cause false alert on devid check. #!/bin/bash dev1=/dev/test/test dev2=/dev/test/scratch1 mnt=/mnt/btrfs umount $dev1 &> /dev/null umount $dev2 &> /dev/null umount $mnt &> /dev/null mkfs.btrfs -f $dev1 mount $dev1 $mnt _fail() { echo "!!! FAILED !!!" exit 1 } for ((i = 0; i < 4096; i++)); do btrfs dev add -f $dev2 $mnt || _fail btrfs dev del $dev1 $mnt || _fail dev_tmp=$dev1 dev1=$dev2 dev2=$dev_tmp done [CAUSE] Tree-checker uses BTRFS_MAX_DEVS() and BTRFS_MAX_DEVS_SYS_CHUNK() as upper limit for devid. But we can have devid holes just like above script. So the check for devid is incorrect and could cause false alert. [FIX] Just remove the whole devid check. We don't have any hard requirement for devid assignment. Furthermore, even devid could get corrupted by a bitflip, we still have dev extents verification at mount time, so corrupted data won't sneak in. This fixes fstests btrfs/194. Reported-by: Anand Jain Fixes: ab4ba2e13346 ("btrfs: tree-checker: Verify dev item") CC: stable@vger.kernel.org # 5.2+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 43e488f5d063..076d5b8014fb 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -686,9 +686,7 @@ static void dev_item_err(const struct extent_buffer *eb, int slot, static int check_dev_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { - struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_dev_item *ditem; - u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK); if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { dev_item_err(leaf, slot, @@ -696,12 +694,6 @@ static int check_dev_item(struct extent_buffer *leaf, key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; } - if (key->offset > max_devid) { - dev_item_err(leaf, slot, - "invalid devid: has=%llu expect=[0, %llu]", - key->offset, max_devid); - return -EUCLEAN; - } ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (btrfs_device_id(leaf, ditem) != key->offset) { dev_item_err(leaf, slot, -- cgit v1.2.3 From 0cab7acc4afc0a4b20fd01a9a28971774501db80 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 25 Oct 2019 10:53:41 +0100 Subject: Btrfs: fix race leading to metadata space leak after task received signal When a task that is allocating metadata needs to wait for the async reclaim job to process its ticket and gets a signal (because it was killed for example) before doing the wait, the task ends up erroring out but with space reserved for its ticket, which never gets released, resulting in a metadata space leak (more specifically a leak in the bytes_may_use counter of the metadata space_info object). Here's the sequence of steps leading to the space leak: 1) A task tries to create a file for example, so it ends up trying to start a transaction at btrfs_create(); 2) The filesystem is currently in a state where there is not enough metadata free space to satisfy the transaction's needs. So at space-info.c:__reserve_metadata_bytes() we create a ticket and add it to the list of tickets of the space info object. Also, because the metadata async reclaim job is not running, we queue a job ro run metadata reclaim; 3) In the meanwhile the task receives a signal (like SIGTERM from a kill command for example); 4) After queing the async reclaim job, at __reserve_metadata_bytes(), we unlock the metadata space info and call handle_reserve_ticket(); 5) That last function calls wait_reserve_ticket(), which acquires the lock from the metadata space info. Then in the first iteration of its while loop, it calls prepare_to_wait_event(), which returns -ERESTARTSYS because the task has a pending signal. As a result, we set the error field of the ticket to -EINTR and exit the while loop without deleting the ticket from the list of tickets (in the space info object). After exiting the loop we unlock the space info; 6) The async reclaim job is able to release enough metadata, acquires the metadata space info's lock and then reserves space for the ticket, since the ticket is still in the list of (non-priority) tickets. The space reservation happens at btrfs_try_granting_tickets(), called from maybe_fail_all_tickets(). This increments the bytes_may_use counter from the metadata space info object, sets the ticket's bytes field to zero (meaning success, that space was reserved) and removes it from the list of tickets; 7) wait_reserve_ticket() returns, with the error field of the ticket set to -EINTR. Then handle_reserve_ticket() just propagates that error to the caller. Because an error was returned, the caller does not release the reserved space, since the expectation is that any error means no space was reserved. Fix this by removing the ticket from the list, while holding the space info lock, at wait_reserve_ticket() when prepare_to_wait_event() returns an error. Also add some comments and an assertion to guarantee we never end up with a ticket that has an error set and a bytes counter field set to zero, to more easily detect regressions in the future. This issue could be triggered sporadically by some test cases from fstests such as generic/269 for example, which tries to fill a filesystem and then kills fsstress processes running in the background. When this issue happens, we get a warning in syslog/dmesg when unmounting the filesystem, like the following: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 13240 at fs/btrfs/block-group.c:3186 btrfs_free_block_groups+0x314/0x470 [btrfs] (...) CPU: 0 PID: 13240 Comm: umount Tainted: G W L 5.3.0-rc8-btrfs-next-48+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_free_block_groups+0x314/0x470 [btrfs] (...) RSP: 0018:ffff9910c14cfdb8 EFLAGS: 00010286 RAX: 0000000000000024 RBX: ffff89cd8a4d55f0 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff89cdf6a178a8 RDI: ffff89cdf6a178a8 RBP: ffff9910c14cfde8 R08: 0000000000000000 R09: 0000000000000001 R10: ffff89cd4d618040 R11: 0000000000000000 R12: ffff89cd8a4d5508 R13: ffff89cde7c4a600 R14: dead000000000122 R15: dead000000000100 FS: 00007f42754432c0(0000) GS:ffff89cdf6a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd25a47f730 CR3: 000000021f8d6006 CR4: 00000000003606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: close_ctree+0x1ad/0x390 [btrfs] generic_shutdown_super+0x6c/0x110 kill_anon_super+0xe/0x30 btrfs_kill_super+0x12/0xa0 [btrfs] deactivate_locked_super+0x3a/0x70 cleanup_mnt+0xb4/0x160 task_work_run+0x7e/0xc0 exit_to_usermode_loop+0xfa/0x100 do_syscall_64+0x1cb/0x220 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f4274d2cb37 (...) RSP: 002b:00007ffcff701d38 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 0000557ebde2f060 RCX: 00007f4274d2cb37 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000557ebde2f240 RBP: 0000557ebde2f240 R08: 0000557ebde2f270 R09: 0000000000000015 R10: 00000000000006b4 R11: 0000000000000246 R12: 00007f427522ee64 R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffcff701fc0 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x75e/0x1fd0 softirqs last enabled at (0): [] copy_process+0x75e/0x1fd0 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace bcf4b235461b26f6 ]--- BTRFS info (device sdb): space_info 4 has 19116032 free, is full BTRFS info (device sdb): space_info total=33554432, used=14176256, pinned=0, reserved=0, may_use=196608, readonly=65536 BTRFS info (device sdb): global_block_rsv: size 0 reserved 0 BTRFS info (device sdb): trans_block_rsv: size 0 reserved 0 BTRFS info (device sdb): chunk_block_rsv: size 0 reserved 0 BTRFS info (device sdb): delayed_block_rsv: size 0 reserved 0 BTRFS info (device sdb): delayed_refs_rsv: size 0 reserved 0 Fixes: 374bf9c5cd7d0b ("btrfs: unify error handling for ticket flushing") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/space-info.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 98dc092a905e..e8a4b0ebe97f 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -893,6 +893,15 @@ static void wait_reserve_ticket(struct btrfs_fs_info *fs_info, while (ticket->bytes > 0 && ticket->error == 0) { ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); if (ret) { + /* + * Delete us from the list. After we unlock the space + * info, we don't want the async reclaim job to reserve + * space for this ticket. If that would happen, then the + * ticket's task would not known that space was reserved + * despite getting an error, resulting in a space leak + * (bytes_may_use counter of our space_info). + */ + list_del_init(&ticket->list); ticket->error = -EINTR; break; } @@ -945,12 +954,24 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info, spin_lock(&space_info->lock); ret = ticket->error; if (ticket->bytes || ticket->error) { + /* + * Need to delete here for priority tickets. For regular tickets + * either the async reclaim job deletes the ticket from the list + * or we delete it ourselves at wait_reserve_ticket(). + */ list_del_init(&ticket->list); if (!ret) ret = -ENOSPC; } spin_unlock(&space_info->lock); ASSERT(list_empty(&ticket->list)); + /* + * Check that we can't have an error set if the reservation succeeded, + * as that would confuse tasks and lead them to error out without + * releasing reserved space (if an error happens the expectation is that + * space wasn't reserved at all). + */ + ASSERT(!(ticket->bytes == 0 && ticket->error)); return ret; } -- cgit v1.2.3 From fa784f2ac00e19edc0d6eb77ac791bc1eb366d7e Mon Sep 17 00:00:00 2001 From: Vincent Prince Date: Tue, 22 Oct 2019 17:09:50 +0200 Subject: net: sch_generic: Use pfifo_fast as fallback scheduler for CAN hardware There is networking hardware that isn't based on Ethernet for layers 1 and 2. For example CAN. CAN is a multi-master serial bus standard for connecting Electronic Control Units [ECUs] also known as nodes. A frame on the CAN bus carries up to 8 bytes of payload. Frame corruption is detected by a CRC. However frame loss due to corruption is possible, but a quite unusual phenomenon. While fq_codel works great for TCP/IP, it doesn't for CAN. There are a lot of legacy protocols on top of CAN, which are not build with flow control or high CAN frame drop rates in mind. When using fq_codel, as soon as the queue reaches a certain delay based length, skbs from the head of the queue are silently dropped. Silently meaning that the user space using a send() or similar syscall doesn't get an error. However TCP's flow control algorithm will detect dropped packages and adjust the bandwidth accordingly. When using fq_codel and sending raw frames over CAN, which is the common use case, the user space thinks the package has been sent without problems, because send() returned without an error. pfifo_fast will drop skbs, if the queue length exceeds the maximum. But with this scheduler the skbs at the tail are dropped, an error (-ENOBUFS) is propagated to user space. So that the user space can slow down the package generation. On distributions, where fq_codel is made default via CONFIG_DEFAULT_NET_SCH during compile time, or set default during runtime with sysctl net.core.default_qdisc (see [1]), we get a bad user experience. In my test case with pfifo_fast, I can transfer thousands of million CAN frames without a frame drop. On the other hand with fq_codel there is more then one lost CAN frame per thousand frames. As pointed out fq_codel is not suited for CAN hardware, so this patch changes attach_one_default_qdisc() to use pfifo_fast for "ARPHRD_CAN" network devices. During transition of a netdev from down to up state the default queuing discipline is attached by attach_default_qdiscs() with the help of attach_one_default_qdisc(). This patch modifies attach_one_default_qdisc() to attach the pfifo_fast (pfifo_fast_ops) if the network device type is "ARPHRD_CAN". [1] https://github.com/systemd/systemd/issues/9194 Signed-off-by: Vincent Prince Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b2d34c49cbe6..8769b4b8807d 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1038,6 +1038,8 @@ static void attach_one_default_qdisc(struct net_device *dev, if (dev->priv_flags & IFF_NO_QUEUE) ops = &noqueue_qdisc_ops; + else if(dev->type == ARPHRD_CAN) + ops = &pfifo_fast_ops; qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL); if (!qdisc) { -- cgit v1.2.3 From 3f6b2c4420610cf0882b395338c0daee15dc102d Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Wed, 23 Oct 2019 11:01:08 +0100 Subject: net: mvneta: make stub functions static inline If the CONFIG_MVNET_BA is not set, then make the stub functions static inline to avoid trying to export them, and remove hte following sparse warnings: drivers/net/ethernet/marvell/mvneta_bm.h:163:6: warning: symbol 'mvneta_bm_pool_destroy' was not declared. Should it be static? drivers/net/ethernet/marvell/mvneta_bm.h:165:6: warning: symbol 'mvneta_bm_bufs_free' was not declared. Should it be static? drivers/net/ethernet/marvell/mvneta_bm.h:167:5: warning: symbol 'mvneta_bm_construct' was not declared. Should it be static? drivers/net/ethernet/marvell/mvneta_bm.h:168:5: warning: symbol 'mvneta_bm_pool_refill' was not declared. Should it be static? drivers/net/ethernet/marvell/mvneta_bm.h:170:23: warning: symbol 'mvneta_bm_pool_use' was not declared. Should it be static? drivers/net/ethernet/marvell/mvneta_bm.h:181:18: warning: symbol 'mvneta_bm_get' was not declared. Should it be static? drivers/net/ethernet/marvell/mvneta_bm.h:182:6: warning: symbol 'mvneta_bm_put' was not declared. Should it be static? Signed-off-by: Ben Dooks (Codethink) Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta_bm.h | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta_bm.h b/drivers/net/ethernet/marvell/mvneta_bm.h index c8425d35c049..e47783ce77e0 100644 --- a/drivers/net/ethernet/marvell/mvneta_bm.h +++ b/drivers/net/ethernet/marvell/mvneta_bm.h @@ -160,16 +160,23 @@ static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv, (bm_pool->id << MVNETA_BM_POOL_ACCESS_OFFS)); } #else -void mvneta_bm_pool_destroy(struct mvneta_bm *priv, - struct mvneta_bm_pool *bm_pool, u8 port_map) {} -void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool, - u8 port_map) {} -int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf) { return 0; } -int mvneta_bm_pool_refill(struct mvneta_bm *priv, - struct mvneta_bm_pool *bm_pool) {return 0; } -struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id, - enum mvneta_bm_type type, u8 port_id, - int pkt_size) { return NULL; } +static inline void mvneta_bm_pool_destroy(struct mvneta_bm *priv, + struct mvneta_bm_pool *bm_pool, + u8 port_map) {} +static inline void mvneta_bm_bufs_free(struct mvneta_bm *priv, + struct mvneta_bm_pool *bm_pool, + u8 port_map) {} +static inline int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf) +{ return 0; } +static inline int mvneta_bm_pool_refill(struct mvneta_bm *priv, + struct mvneta_bm_pool *bm_pool) +{ return 0; } +static inline struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, + u8 pool_id, + enum mvneta_bm_type type, + u8 port_id, + int pkt_size) +{ return NULL; } static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool, @@ -178,7 +185,8 @@ static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv, static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool) { return 0; } -struct mvneta_bm *mvneta_bm_get(struct device_node *node) { return NULL; } -void mvneta_bm_put(struct mvneta_bm *priv) {} +static inline struct mvneta_bm *mvneta_bm_get(struct device_node *node) +{ return NULL; } +static inline void mvneta_bm_put(struct mvneta_bm *priv) {} #endif /* CONFIG_MVNETA_BM */ #endif -- cgit v1.2.3 From 91e2e57636f163837e1aea2ce6c4995b8a2a6f10 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Wed, 23 Oct 2019 11:01:39 +0100 Subject: net: hwbm: if CONFIG_NET_HWBM unset, make stub functions static If CONFIG_NET_HWBM is not set, then these stub functions in should be declared static to avoid trying to export them from any driver that includes this. Fixes the following sparse warnings: ./include/net/hwbm.h:24:6: warning: symbol 'hwbm_buf_free' was not declared. Should it be static? ./include/net/hwbm.h:25:5: warning: symbol 'hwbm_pool_refill' was not declared. Should it be static? ./include/net/hwbm.h:26:5: warning: symbol 'hwbm_pool_add' was not declared. Should it be static? Signed-off-by: Ben Dooks (Codethink) Signed-off-by: David S. Miller --- include/net/hwbm.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/hwbm.h b/include/net/hwbm.h index 81643cf8a1c4..c81444611a22 100644 --- a/include/net/hwbm.h +++ b/include/net/hwbm.h @@ -21,9 +21,13 @@ void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf); int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp); int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num); #else -void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {} -int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) { return 0; } -int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num) +static inline void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {} + +static inline int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) +{ return 0; } + +static inline int hwbm_pool_add(struct hwbm_pool *bm_pool, + unsigned int buf_num) { return 0; } #endif /* CONFIG_HWBM */ #endif /* _HWBM_H */ -- cgit v1.2.3 From f536dffc0b79738c3104af999318279dccbaa261 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 23 Oct 2019 15:44:05 +0200 Subject: net/smc: fix closing of fallback SMC sockets For SMC sockets forced to fallback to TCP, the file is propagated from the outer SMC to the internal TCP socket. When closing the SMC socket, the internal TCP socket file pointer must be restored to the original NULL value, otherwise memory leaks may show up (found with CONFIG_DEBUG_KMEMLEAK). The internal TCP socket is released in smc_clcsock_release(), which calls __sock_release() function in net/socket.c. This calls the needed iput(SOCK_INODE(sock)) only, if the file pointer has been reset to the original NULL-value. Fixes: 07603b230895 ("net/smc: propagate file from SMC to TCP socket") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5b932583e407..d9566e84f2f9 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -123,6 +123,12 @@ struct proto smc_proto6 = { }; EXPORT_SYMBOL_GPL(smc_proto6); +static void smc_restore_fallback_changes(struct smc_sock *smc) +{ + smc->clcsock->file->private_data = smc->sk.sk_socket; + smc->clcsock->file = NULL; +} + static int __smc_release(struct smc_sock *smc) { struct sock *sk = &smc->sk; @@ -141,6 +147,7 @@ static int __smc_release(struct smc_sock *smc) } sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); + smc_restore_fallback_changes(smc); } sk->sk_prot->unhash(sk); -- cgit v1.2.3 From ca5f8d2dd5229ccacdd5cfde1ce4d32b0810e454 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 23 Oct 2019 15:44:06 +0200 Subject: net/smc: keep vlan_id for SMC-R in smc_listen_work() Creating of an SMC-R connection with vlan-id fails, because smc_listen_work() determines the vlan_id of the connection, saves it in struct smc_init_info ini, but clears the ini area again if SMC-D is not applicable. This patch just resets the ISM device before investigating SMC-R availability. Fixes: bc36d2fc93eb ("net/smc: consolidate function parameters") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d9566e84f2f9..cea3c36ea0da 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1298,8 +1298,8 @@ static void smc_listen_work(struct work_struct *work) /* check if RDMA is available */ if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */ /* prepare RDMA check */ - memset(&ini, 0, sizeof(ini)); ini.is_smcd = false; + ini.ism_dev = NULL; ini.ib_lcl = &pclc->lcl; rc = smc_find_rdma_device(new_smc, &ini); if (rc) { -- cgit v1.2.3 From 16d65287927e06b5c6c522d8d479def36c19844b Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Wed, 23 Oct 2019 20:56:38 +0530 Subject: net: ethernet: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header file related to ethernet driver for Cortina Gemini devices. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Acked-by: Linus Walleij Signed-off-by: David S. Miller --- drivers/net/ethernet/cortina/gemini.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cortina/gemini.h b/drivers/net/ethernet/cortina/gemini.h index 0b12f89bf89a..9fdf77d5eb37 100644 --- a/drivers/net/ethernet/cortina/gemini.h +++ b/drivers/net/ethernet/cortina/gemini.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* Register definitions for Gemini GMAC Ethernet device driver * * Copyright (C) 2006 Storlink, Corp. -- cgit v1.2.3 From d4e4fdf9e4a27c87edb79b1478955075be141f67 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 23 Oct 2019 18:39:04 +0200 Subject: netns: fix GFP flags in rtnl_net_notifyid() In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances, but there are a few paths calling rtnl_net_notifyid() from atomic context or from RCU critical sections. The later also precludes the use of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new() call is wrong too, as it uses GFP_KERNEL unconditionally. Therefore, we need to pass the GFP flags as parameter and propagate it through function calls until the proper flags can be determined. In most cases, GFP_KERNEL is fine. The exceptions are: * openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump() indirectly call rtnl_net_notifyid() from RCU critical section, * rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as parameter. Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used by nlmsg_new(). The function is allowed to sleep, so better make the flags consistent with the ones used in the following ovs_vport_cmd_fill_info() call. Found by code inspection. Fixes: 9a9634545c70 ("netns: notify netns id events") Signed-off-by: Guillaume Nault Acked-by: Nicolas Dichtel Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 +- net/core/dev.c | 2 +- net/core/net_namespace.c | 17 +++++++++-------- net/core/rtnetlink.c | 14 +++++++------- net/openvswitch/datapath.c | 20 +++++++++++--------- 5 files changed, 29 insertions(+), 26 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 4c2cd9378699..c7e15a213ef2 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -342,7 +342,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #define __net_initconst __initconst #endif -int peernet2id_alloc(struct net *net, struct net *peer); +int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp); int peernet2id(struct net *net, struct net *peer); bool peernet_has_id(struct net *net, struct net *peer); struct net *get_net_ns_by_id(struct net *net, int id); diff --git a/net/core/dev.c b/net/core/dev.c index 1482e2ef2d25..96afd464284a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9770,7 +9770,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); - new_nsid = peernet2id_alloc(dev_net(dev), net); + new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL); /* If there is an ifindex conflict assign a new one */ if (__dev_get_by_index(net, dev->ifindex)) new_ifindex = dev_new_index(net); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 5a4ae0845bac..39402840025e 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -246,11 +246,11 @@ static int __peernet2id(struct net *net, struct net *peer) } static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, gfp_t gfp); /* This function returns the id of a peer netns. If no id is assigned, one will * be allocated and returned. */ -int peernet2id_alloc(struct net *net, struct net *peer) +int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) { bool alloc = false, alive = false; int id; @@ -269,7 +269,7 @@ int peernet2id_alloc(struct net *net, struct net *peer) id = __peernet2id_alloc(net, peer, &alloc); spin_unlock_bh(&net->nsid_lock); if (alloc && id >= 0) - rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL); + rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp); if (alive) put_net(peer); return id; @@ -534,7 +534,8 @@ static void unhash_nsid(struct net *net, struct net *last) idr_remove(&tmp->netns_ids, id); spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) - rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL); + rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL, + GFP_KERNEL); if (tmp == last) break; } @@ -767,7 +768,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, - nlh); + nlh, GFP_KERNEL); err = 0; } else if (err == -ENOSPC && nsid >= 0) { err = -EEXIST; @@ -1055,7 +1056,7 @@ end: } static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, - struct nlmsghdr *nlh) + struct nlmsghdr *nlh, gfp_t gfp) { struct net_fill_args fillargs = { .portid = portid, @@ -1066,7 +1067,7 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, struct sk_buff *msg; int err = -ENOMEM; - msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); + msg = nlmsg_new(rtnl_net_get_size(), gfp); if (!msg) goto out; @@ -1074,7 +1075,7 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, if (err < 0) goto err_out; - rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, 0); + rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp); return; err_out: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 13493aae4e6c..ba4b4048ec3e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1523,7 +1523,7 @@ static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, static int rtnl_fill_link_netnsid(struct sk_buff *skb, const struct net_device *dev, - struct net *src_net) + struct net *src_net, gfp_t gfp) { bool put_iflink = false; @@ -1531,7 +1531,7 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb, struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id_alloc(src_net, link_net); + int id = peernet2id_alloc(src_net, link_net, gfp); if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) return -EMSGSIZE; @@ -1589,7 +1589,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, u32 event, int *new_nsid, int new_ifindex, - int tgt_netnsid) + int tgt_netnsid, gfp_t gfp) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1681,7 +1681,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; } - if (rtnl_fill_link_netnsid(skb, dev, src_net)) + if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp)) goto nla_put_failure; if (new_nsid && @@ -2001,7 +2001,7 @@ walk_entries: NETLINK_CB(cb->skb).portid, nlh->nlmsg_seq, 0, flags, ext_filter_mask, 0, NULL, 0, - netnsid); + netnsid, GFP_KERNEL); if (err < 0) { if (likely(skb->len)) @@ -3360,7 +3360,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, err = rtnl_fill_ifinfo(nskb, dev, net, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask, - 0, NULL, 0, netnsid); + 0, NULL, 0, netnsid, GFP_KERNEL); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -3472,7 +3472,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), type, 0, 0, change, 0, 0, event, - new_nsid, new_ifindex, -1); + new_nsid, new_ifindex, -1, flags); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f30e406fbec5..d8c364d637b1 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1881,7 +1881,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { /* Called with ovs_mutex or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, struct net *net, u32 portid, u32 seq, - u32 flags, u8 cmd) + u32 flags, u8 cmd, gfp_t gfp) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; @@ -1902,7 +1902,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, goto nla_put_failure; if (!net_eq(net, dev_net(vport->dev))) { - int id = peernet2id_alloc(net, dev_net(vport->dev)); + int id = peernet2id_alloc(net, dev_net(vport->dev), gfp); if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) goto nla_put_failure; @@ -1943,11 +1943,12 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, struct sk_buff *skb; int retval; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd, + GFP_KERNEL); BUG_ON(retval < 0); return skb; @@ -2089,7 +2090,7 @@ restart: err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_NEW); + OVS_VPORT_CMD_NEW, GFP_KERNEL); new_headroom = netdev_get_fwd_headroom(vport->dev); @@ -2150,7 +2151,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_SET); + OVS_VPORT_CMD_SET, GFP_KERNEL); BUG_ON(err < 0); ovs_unlock(); @@ -2190,7 +2191,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_DEL); + OVS_VPORT_CMD_DEL, GFP_KERNEL); BUG_ON(err < 0); /* the vport deletion may trigger dp headroom update */ @@ -2237,7 +2238,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_GET); + OVS_VPORT_CMD_GET, GFP_ATOMIC); BUG_ON(err < 0); rcu_read_unlock(); @@ -2273,7 +2274,8 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_VPORT_CMD_GET) < 0) + OVS_VPORT_CMD_GET, + GFP_ATOMIC) < 0) goto out; j++; -- cgit v1.2.3 From 7c3bebc3d8688b84795c11848c314a2fbfe045e0 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 23 Oct 2019 23:03:55 +0530 Subject: cxgb4: request the TX CIDX updates to status page For adapters which support the SGE Doorbell Queue Timer facility, we configured the Ethernet TX Queues to send CIDX Updates to the Associated Ethernet RX Response Queue with CPL_SGE_EGR_UPDATE messages to allow us to respond more quickly to the CIDX Updates. But, this was adding load to PCIe Link RX bandwidth and, potentially, resulting in higher CPU Interrupt load. This patch requests the HW to deliver the CIDX updates to the TX queue status page rather than generating an ingress queue message (as an interrupt). With this patch, the load on RX bandwidth is reduced and a substantial improvement in BW is noticed at lower IO sizes. Fixes: d429005fdf2c ("cxgb4/cxgb4vf: Add support for SGE doorbell queue timer") Signed-off-by: Raju Rangoju Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index b3da81e90132..928bfea5457b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -3791,15 +3791,11 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, * write the CIDX Updates into the Status Page at the end of the * TX Queue. */ - c.autoequiqe_to_viid = htonl((dbqt - ? FW_EQ_ETH_CMD_AUTOEQUIQE_F - : FW_EQ_ETH_CMD_AUTOEQUEQE_F) | + c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | FW_EQ_ETH_CMD_VIID_V(pi->viid)); c.fetchszm_to_iqid = - htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(dbqt - ? HOSTFCMODE_INGRESS_QUEUE_X - : HOSTFCMODE_STATUS_PAGE_X) | + htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid)); -- cgit v1.2.3 From f2bbdbcb075f3977a53da3bdcb7cd460bc8ae5f2 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 26 Oct 2019 12:06:20 +0900 Subject: ALSA: bebob: Fix prototype of helper function to return negative value A helper function of ALSA bebob driver returns negative value in a function which has a prototype to return unsigned value. This commit fixes it by changing the prototype. Fixes: eb7b3a056cd8 ("ALSA: bebob: Add commands and connections/streams management") Cc: # v3.16+ Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20191026030620.12077-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/bebob/bebob_stream.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c index 73fee991bd75..6c1497d9f52b 100644 --- a/sound/firewire/bebob/bebob_stream.c +++ b/sound/firewire/bebob/bebob_stream.c @@ -252,8 +252,7 @@ end: return err; } -static unsigned int -map_data_channels(struct snd_bebob *bebob, struct amdtp_stream *s) +static int map_data_channels(struct snd_bebob *bebob, struct amdtp_stream *s) { unsigned int sec, sections, ch, channels; unsigned int pcm, midi, location; -- cgit v1.2.3 From 7e5d0bf6afcc7bd72f78e7f33570e2e0945624f0 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 9 Oct 2019 17:43:43 -0300 Subject: ARM: dts: imx6qdl-sabreauto: Fix storm of accelerometer interrupts Since commit a211b8c55f3c ("ARM: dts: imx6qdl-sabreauto: Add sensors") a storm of accelerometer interrupts is seen: [ 114.211283] irq 260: nobody cared (try booting with the "irqpoll" option) [ 114.218108] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.4 #1 [ 114.223960] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 114.230531] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 114.238301] [] (show_stack) from [] (dump_stack+0xd8/0x110) [ 114.245644] [] (dump_stack) from [] (__report_bad_irq+0x30/0xc0) [ 114.253417] [] (__report_bad_irq) from [] (note_interrupt+0x108/0x298) [ 114.261707] [] (note_interrupt) from [] (handle_irq_event_percpu+0x70/0x80) [ 114.270433] [] (handle_irq_event_percpu) from [] (handle_irq_event+0x38/0x5c) [ 114.279326] [] (handle_irq_event) from [] (handle_level_irq+0xc8/0x154) [ 114.287701] [] (handle_level_irq) from [] (generic_handle_irq+0x20/0x34) [ 114.296166] [] (generic_handle_irq) from [] (mxc_gpio_irq_handler+0x30/0xf0) [ 114.304975] [] (mxc_gpio_irq_handler) from [] (mx3_gpio_irq_handler+0x60/0xb0) [ 114.313955] [] (mx3_gpio_irq_handler) from [] (generic_handle_irq+0x20/0x34) [ 114.322762] [] (generic_handle_irq) from [] (__handle_domain_irq+0x64/0xe0) [ 114.331485] [] (__handle_domain_irq) from [] (gic_handle_irq+0x4c/0xa8) [ 114.339862] [] (gic_handle_irq) from [] (__irq_svc+0x70/0x98) [ 114.347361] Exception stack(0xc1301ec0 to 0xc1301f08) [ 114.352435] 1ec0: 00000001 00000006 00000000 c130c340 00000001 c130f688 9785636d c13ea2e8 [ 114.360635] 1ee0: 9784907d 0000001a eaf99d78 0000001a 00000000 c1301f10 c0182b00 c0878de4 [ 114.368830] 1f00: 20000013 ffffffff [ 114.372349] [] (__irq_svc) from [] (cpuidle_enter_state+0x168/0x5f4) [ 114.380464] [] (cpuidle_enter_state) from [] (cpuidle_enter+0x28/0x38) [ 114.388751] [] (cpuidle_enter) from [] (do_idle+0x224/0x2a8) [ 114.396168] [] (do_idle) from [] (cpu_startup_entry+0x18/0x20) [ 114.403765] [] (cpu_startup_entry) from [] (start_kernel+0x43c/0x500) [ 114.411958] handlers: [ 114.414302] [] irq_default_primary_handler threaded [] mma8452_interrupt [ 114.422974] Disabling IRQ #260 CPU0 CPU1 .... 260: 100001 0 gpio-mxc 31 Level mma8451 The MMA8451 interrupt triggers as low level, so the GPIO6_IO31 pin needs to activate its pull up, otherwise it will stay always at low level generating multiple interrupts. The current device tree does not configure the IOMUX for this pin, so it uses whathever comes configured from the bootloader. The IOMUXC_SW_PAD_CTL_PAD_EIM_BCLK register value comes as 0x8000 from the bootloader, which has PKE bit cleared, hence disabling the pull-up. Instead of relying on a previous configuration from the bootloader, configure the GPIO6_IO31 pin with pull-up enabled in order to fix this problem. Fixes: a211b8c55f3c ("ARM: dts: imx6qdl-sabreauto: Add sensors") Signed-off-by: Fabio Estevam Reviewed-By: Leonard Crestez Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-sabreauto.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index f3404dd10537..cf628465cd0a 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -230,6 +230,8 @@ accelerometer@1c { compatible = "fsl,mma8451"; reg = <0x1c>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_mma8451_int>; interrupt-parent = <&gpio6>; interrupts = <31 IRQ_TYPE_LEVEL_LOW>; }; @@ -628,6 +630,12 @@ >; }; + pinctrl_mma8451_int: mma8451intgrp { + fsl,pins = < + MX6QDL_PAD_EIM_BCLK__GPIO6_IO31 0xb0b1 + >; + }; + pinctrl_pwm3: pwm1grp { fsl,pins = < MX6QDL_PAD_SD4_DAT1__PWM3_OUT 0x1b0b1 -- cgit v1.2.3 From 96ed1044fa98ea9e164fc1e679cad61575bf4f32 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 15 Oct 2019 15:09:09 +0100 Subject: soc: imx: gpc: fix initialiser format Make the initialiers in imx_gpc_domains C99 format to fix the following sparse warnings: drivers/soc/imx/gpc.c:252:30: warning: obsolete array initializer, use C99 syntax drivers/soc/imx/gpc.c:258:29: warning: obsolete array initializer, use C99 syntax drivers/soc/imx/gpc.c:269:34: warning: obsolete array initializer, use C99 syntax drivers/soc/imx/gpc.c:278:30: warning: obsolete array initializer, use C99 syntax Signed-off-by: Ben Dooks Reviewed-by: Lucas Stach Fixes: b0682d485f12 ("soc: imx: gpc: use GPC_PGC_DOMAIN_* indexes") Signed-off-by: Shawn Guo --- drivers/soc/imx/gpc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index d9231bd3c691..98b9d9a902ae 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -249,13 +249,13 @@ static struct genpd_power_state imx6_pm_domain_pu_state = { }; static struct imx_pm_domain imx_gpc_domains[] = { - [GPC_PGC_DOMAIN_ARM] { + [GPC_PGC_DOMAIN_ARM] = { .base = { .name = "ARM", .flags = GENPD_FLAG_ALWAYS_ON, }, }, - [GPC_PGC_DOMAIN_PU] { + [GPC_PGC_DOMAIN_PU] = { .base = { .name = "PU", .power_off = imx6_pm_domain_power_off, @@ -266,7 +266,7 @@ static struct imx_pm_domain imx_gpc_domains[] = { .reg_offs = 0x260, .cntr_pdn_bit = 0, }, - [GPC_PGC_DOMAIN_DISPLAY] { + [GPC_PGC_DOMAIN_DISPLAY] = { .base = { .name = "DISPLAY", .power_off = imx6_pm_domain_power_off, @@ -275,7 +275,7 @@ static struct imx_pm_domain imx_gpc_domains[] = { .reg_offs = 0x240, .cntr_pdn_bit = 4, }, - [GPC_PGC_DOMAIN_PCI] { + [GPC_PGC_DOMAIN_PCI] = { .base = { .name = "PCI", .power_off = imx6_pm_domain_power_off, -- cgit v1.2.3 From 5ff223e86f5addbfae26419cbb5d61d98f6fbf7d Mon Sep 17 00:00:00 2001 From: zhanglin Date: Sat, 26 Oct 2019 15:54:16 +0800 Subject: net: Zeroing the structure ethtool_wolinfo in ethtool_get_wol() memset() the structure ethtool_wolinfo that has padded bytes but the padded bytes have not been zeroed out. Signed-off-by: zhanglin Signed-off-by: David S. Miller --- net/core/ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c763106c73fc..cd9bc67381b2 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1396,11 +1396,13 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr) static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) { - struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; + struct ethtool_wolinfo wol; if (!dev->ethtool_ops->get_wol) return -EOPNOTSUPP; + memset(&wol, 0, sizeof(struct ethtool_wolinfo)); + wol.cmd = ETHTOOL_GWOL; dev->ethtool_ops->get_wol(dev, &wol); if (copy_to_user(useraddr, &wol, sizeof(wol))) -- cgit v1.2.3 From 0b834ba00ab5337e938c727e216e1f5249794717 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 26 Oct 2019 11:53:39 +0200 Subject: ipv4: fix route update on metric change. Since commit af4d768ad28c ("net/ipv4: Add support for specifying metric of connected routes"), when updating an IP address with a different metric, the associated connected route is updated, too. Still, the mentioned commit doesn't handle properly some corner cases: $ ip addr add dev eth0 192.168.1.0/24 $ ip addr add dev eth0 192.168.2.1/32 peer 192.168.2.2 $ ip addr add dev eth0 192.168.3.1/24 $ ip addr change dev eth0 192.168.1.0/24 metric 10 $ ip addr change dev eth0 192.168.2.1/32 peer 192.168.2.2 metric 10 $ ip addr change dev eth0 192.168.3.1/24 metric 10 $ ip -4 route 192.168.1.0/24 dev eth0 proto kernel scope link src 192.168.1.0 192.168.2.2 dev eth0 proto kernel scope link src 192.168.2.1 192.168.3.0/24 dev eth0 proto kernel scope link src 192.168.2.1 metric 10 Only the last route is correctly updated. The problem is the current test in fib_modify_prefix_metric(): if (!(dev->flags & IFF_UP) || ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) || ipv4_is_zeronet(prefix) || prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32) Which should be the logical 'not' of the pre-existing test in fib_add_ifaddr(): if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) To properly negate the original expression, we need to change the last logical 'or' to a logical 'and'. Fixes: af4d768ad28c ("net/ipv4: Add support for specifying metric of connected routes") Reported-and-suggested-by: Beniamino Galvani Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index dde77f72e03e..71c78d223dfd 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1148,7 +1148,7 @@ void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric) if (!(dev->flags & IFF_UP) || ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) || ipv4_is_zeronet(prefix) || - prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32) + (prefix == ifa->ifa_local && ifa->ifa_prefixlen == 32)) return; /* add the new */ -- cgit v1.2.3 From 37de3b354150450ba12275397155e68113e99901 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 26 Oct 2019 11:53:40 +0200 Subject: selftests: fib_tests: add more tests for metric update This patch adds two more tests to ipv4_addr_metric_test() to explicitly cover the scenarios fixed by the previous patch. Suggested-by: David Ahern Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_tests.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index c4ba0ff4a53f..76c1897e6352 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1438,6 +1438,27 @@ ipv4_addr_metric_test() fi log_test $rc 0 "Prefix route with metric on link up" + # explicitly check for metric changes on edge scenarios + run_cmd "$IP addr flush dev dummy2" + run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259" + run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260" + rc=$? + fi + log_test $rc 0 "Modify metric of .0/24 address" + + run_cmd "$IP addr flush dev dummy2" + run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260" + run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261" + rc=$? + fi + log_test $rc 0 "Modify metric of address with peer route" + $IP li del dummy1 $IP li del dummy2 cleanup -- cgit v1.2.3 From a51bab592fbbef10f0e42a8aed86adfbf6a68fa7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 21 Oct 2019 16:18:36 +0200 Subject: usb: dwc3: select CONFIG_REGMAP_MMIO After many randconfig builds, one configuration caused a link error with dwc3-meson-g12a lacking the regmap-mmio code: drivers/usb/dwc3/dwc3-meson-g12a.o: In function `dwc3_meson_g12a_probe': dwc3-meson-g12a.c:(.text+0x9f): undefined reference to `__devm_regmap_init_mmio_clk' Add the select statement that we have for all other users of that dependency. Fixes: c99993376f72 ("usb: dwc3: Add Amlogic G12A DWC3 glue") Acked-by: Neil Armstrong Signed-off-by: Arnd Bergmann Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig index 89abc6078703..556a876c7896 100644 --- a/drivers/usb/dwc3/Kconfig +++ b/drivers/usb/dwc3/Kconfig @@ -102,6 +102,7 @@ config USB_DWC3_MESON_G12A depends on ARCH_MESON || COMPILE_TEST default USB_DWC3 select USB_ROLE_SWITCH + select REGMAP_MMIO help Support USB2/3 functionality in Amlogic G12A platforms. Say 'Y' or 'M' if you have one such device. -- cgit v1.2.3 From bc1e3a2dd0c9954fd956ac43ca2876bbea018c01 Mon Sep 17 00:00:00 2001 From: Nikhil Badola Date: Mon, 21 Oct 2019 18:21:51 +0800 Subject: usb: fsl: Check memory resource before releasing it Check memory resource existence before releasing it to avoid NULL pointer dereference Signed-off-by: Nikhil Badola Reviewed-by: Ran Wang Reviewed-by: Peter Chen Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/fsl_udc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index 20141c3096f6..9a05863b2876 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -2576,7 +2576,7 @@ static int fsl_udc_remove(struct platform_device *pdev) dma_pool_destroy(udc_controller->td_pool); free_irq(udc_controller->irq, udc_controller); iounmap(dr_regs); - if (pdata->operating_mode == FSL_USB2_DR_DEVICE) + if (res && (pdata->operating_mode == FSL_USB2_DR_DEVICE)) release_mem_region(res->start, resource_size(res)); /* free udc --wait for the release() finished */ -- cgit v1.2.3 From b26a4052cf9a93672f154976de14705fbf8a8179 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Thu, 17 Oct 2019 18:27:17 +0100 Subject: usb: mtu3: fix missing include of mtu3_dr.h The declarations of ssusb_gadget_{init,exit} are in the mtu3_dr.h file but the code does that implements them does not include this. Add the include to fix the following sparse warnigns: drivers/usb/mtu3/mtu3_core.c:825:5: warning: symbol 'ssusb_gadget_init' was not declared. Should it be static? drivers/usb/mtu3/mtu3_core.c:925:6: warning: symbol 'ssusb_gadget_exit' was not declared. Should it be static? Acked-by: Chunfeng Yun Signed-off-by: Ben Dooks Signed-off-by: Felipe Balbi --- drivers/usb/mtu3/mtu3_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/mtu3/mtu3_core.c b/drivers/usb/mtu3/mtu3_core.c index c3d5c1206eec..9dd02160cca9 100644 --- a/drivers/usb/mtu3/mtu3_core.c +++ b/drivers/usb/mtu3/mtu3_core.c @@ -16,6 +16,7 @@ #include #include "mtu3.h" +#include "mtu3_dr.h" #include "mtu3_debug.h" #include "mtu3_trace.h" -- cgit v1.2.3 From 5053691a7d62a4ad1566aa923e33f5d21a8016f0 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Thu, 17 Oct 2019 13:44:27 +0100 Subject: usb: cdns3: include host-export,h for cdns3_host_init The cdns3_host_init() function is declared in host-export.h but host.c does not include it. Add the include to have the declaration present (and remove the declaration of cdns3_host_exit which is now static). Fixes the following sparse warning: drivers/usb/cdns3/host.c:58:5: warning: symbol 'cdns3_host_init' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: Felipe Balbi --- drivers/usb/cdns3/host-export.h | 1 - drivers/usb/cdns3/host.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/host-export.h b/drivers/usb/cdns3/host-export.h index b498a170b7e8..ae11810f8826 100644 --- a/drivers/usb/cdns3/host-export.h +++ b/drivers/usb/cdns3/host-export.h @@ -12,7 +12,6 @@ #ifdef CONFIG_USB_CDNS3_HOST int cdns3_host_init(struct cdns3 *cdns); -void cdns3_host_exit(struct cdns3 *cdns); #else diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 2733a8f71fcd..ad788bf3fe4f 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -12,6 +12,7 @@ #include #include "core.h" #include "drd.h" +#include "host-export.h" static int __cdns3_host_init(struct cdns3 *cdns) { -- cgit v1.2.3 From 2457b2c1b44eeb022308bd524b34fa9b3ac3da11 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Tue, 15 Oct 2019 16:50:44 +0100 Subject: usb: renesas_usbhs: fix __le16 warnings Fix the warnings generated by casting to/from __le16 without using the correct functions. Fixes the following sparse warnings: drivers/usb/renesas_usbhs/common.c:165:25: warning: incorrect type in assignment (different base types) drivers/usb/renesas_usbhs/common.c:165:25: expected restricted __le16 [usertype] wValue drivers/usb/renesas_usbhs/common.c:165:25: got unsigned short drivers/usb/renesas_usbhs/common.c:166:25: warning: incorrect type in assignment (different base types) drivers/usb/renesas_usbhs/common.c:166:25: expected restricted __le16 [usertype] wIndex drivers/usb/renesas_usbhs/common.c:166:25: got unsigned short drivers/usb/renesas_usbhs/common.c:167:25: warning: incorrect type in assignment (different base types) drivers/usb/renesas_usbhs/common.c:167:25: expected restricted __le16 [usertype] wLength drivers/usb/renesas_usbhs/common.c:167:25: got unsigned short drivers/usb/renesas_usbhs/common.c:173:39: warning: incorrect type in argument 3 (different base types) drivers/usb/renesas_usbhs/common.c:173:39: expected unsigned short [usertype] data drivers/usb/renesas_usbhs/common.c:173:39: got restricted __le16 [usertype] wValue drivers/usb/renesas_usbhs/common.c:174:39: warning: incorrect type in argument 3 (different base types) drivers/usb/renesas_usbhs/common.c:174:39: expected unsigned short [usertype] data drivers/usb/renesas_usbhs/common.c:174:39: got restricted __le16 [usertype] wIndex drivers/usb/renesas_usbhs/common.c:175:39: warning: incorrect type in argument 3 (different base types) drivers/usb/renesas_usbhs/common.c:175:39: expected unsigned short [usertype] data Note. I belive this to be correct, and should be a no-op on arm. Reviewed-by: Geert Uytterhoeven Reviewed-by: Yoshihiro Shimoda Signed-off-by: Ben Dooks Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/common.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 4c3de777ef6c..a3c30b609433 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -162,17 +162,17 @@ void usbhs_usbreq_get_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req) req->bRequest = (val >> 8) & 0xFF; req->bRequestType = (val >> 0) & 0xFF; - req->wValue = usbhs_read(priv, USBVAL); - req->wIndex = usbhs_read(priv, USBINDX); - req->wLength = usbhs_read(priv, USBLENG); + req->wValue = cpu_to_le16(usbhs_read(priv, USBVAL)); + req->wIndex = cpu_to_le16(usbhs_read(priv, USBINDX)); + req->wLength = cpu_to_le16(usbhs_read(priv, USBLENG)); } void usbhs_usbreq_set_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req) { usbhs_write(priv, USBREQ, (req->bRequest << 8) | req->bRequestType); - usbhs_write(priv, USBVAL, req->wValue); - usbhs_write(priv, USBINDX, req->wIndex); - usbhs_write(priv, USBLENG, req->wLength); + usbhs_write(priv, USBVAL, le16_to_cpu(req->wValue)); + usbhs_write(priv, USBINDX, le16_to_cpu(req->wIndex)); + usbhs_write(priv, USBLENG, le16_to_cpu(req->wLength)); usbhs_bset(priv, DCPCTR, SUREQ, SUREQ); } -- cgit v1.2.3 From ef48aacf860bc0bf24a0e9afee1f1a12cebf1155 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 16 Oct 2019 13:38:28 +0900 Subject: usb: gadget: udc: renesas_usb3: Fix __le16 warnings This patch fixes the following sparse warnings by using a macro and a suitable variable type. drivers/usb/gadget/udc/renesas_usb3.c:1547:17: warning: restricted __le16 degrades to integer drivers/usb/gadget/udc/renesas_usb3.c:1550:43: warning: incorrect type in argument 2 (different base types) drivers/usb/gadget/udc/renesas_usb3.c:1550:43: expected unsigned short [usertype] addr drivers/usb/gadget/udc/renesas_usb3.c:1550:43: got restricted __le16 [usertype] wValue drivers/usb/gadget/udc/renesas_usb3.c:1607:24: warning: incorrect type in assignment (different base types) drivers/usb/gadget/udc/renesas_usb3.c:1607:24: expected unsigned short [assigned] [usertype] status drivers/usb/gadget/udc/renesas_usb3.c:1607:24: got restricted __le16 [usertype] drivers/usb/gadget/udc/renesas_usb3.c:1775:17: warning: restricted __le16 degrades to integer Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/renesas_usb3.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index e098f16c01cb..33703140233a 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -1544,10 +1544,10 @@ static void usb3_set_device_address(struct renesas_usb3 *usb3, u16 addr) static bool usb3_std_req_set_address(struct renesas_usb3 *usb3, struct usb_ctrlrequest *ctrl) { - if (ctrl->wValue >= 128) + if (le16_to_cpu(ctrl->wValue) >= 128) return true; /* stall */ - usb3_set_device_address(usb3, ctrl->wValue); + usb3_set_device_address(usb3, le16_to_cpu(ctrl->wValue)); usb3_set_p0_con_for_no_data(usb3); return false; @@ -1582,6 +1582,7 @@ static bool usb3_std_req_get_status(struct renesas_usb3 *usb3, struct renesas_usb3_ep *usb3_ep; int num; u16 status = 0; + __le16 tx_data; switch (ctrl->bRequestType & USB_RECIP_MASK) { case USB_RECIP_DEVICE: @@ -1604,10 +1605,10 @@ static bool usb3_std_req_get_status(struct renesas_usb3 *usb3, } if (!stall) { - status = cpu_to_le16(status); + tx_data = cpu_to_le16(status); dev_dbg(usb3_to_dev(usb3), "get_status: req = %p\n", usb_req_to_usb3_req(usb3->ep0_req)); - usb3_pipe0_internal_xfer(usb3, &status, sizeof(status), + usb3_pipe0_internal_xfer(usb3, &tx_data, sizeof(tx_data), usb3_pipe0_get_status_completion); } @@ -1772,7 +1773,7 @@ static bool usb3_std_req_set_sel(struct renesas_usb3 *usb3, static bool usb3_std_req_set_configuration(struct renesas_usb3 *usb3, struct usb_ctrlrequest *ctrl) { - if (ctrl->wValue > 0) + if (le16_to_cpu(ctrl->wValue) > 0) usb3_set_bit(usb3, USB_COM_CON_CONF, USB3_USB_COM_CON); else usb3_clear_bit(usb3, USB_COM_CON_CONF, USB3_USB_COM_CON); -- cgit v1.2.3 From 20ee71cc374a4933808f735354cb4fd949d657c7 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 16 Oct 2019 13:14:33 +0900 Subject: usb: renesas_usbhs: Fix warnings in usbhsg_recip_handler_std_set_device() This patch fixes the following sparse warnings by shifting 8-bits after le16_to_cpu(). drivers/usb/renesas_usbhs/mod_gadget.c:268:47: warning: restricted __le16 degrades to integer drivers/usb/renesas_usbhs/mod_gadget.c:268:47: warning: cast to restricted __le16 Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/mod_gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index e5ef56991dba..efc40bc1f68c 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -265,7 +265,7 @@ static int usbhsg_recip_handler_std_set_device(struct usbhs_priv *priv, case USB_DEVICE_TEST_MODE: usbhsg_recip_handler_std_control_done(priv, uep, ctrl); udelay(100); - usbhs_sys_set_test_mode(priv, le16_to_cpu(ctrl->wIndex >> 8)); + usbhs_sys_set_test_mode(priv, le16_to_cpu(ctrl->wIndex) >> 8); break; default: usbhsg_recip_handler_std_control_done(priv, uep, ctrl); -- cgit v1.2.3 From e92f30ac37d12756aeb733538ad85df2be7e139f Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Tue, 15 Oct 2019 16:30:17 +0100 Subject: usb: renesas_usbhs: fix type of buf Fix the type of buf in __usbhsg_recip_send_status to be __le16 to avoid the following sparse warning: drivers/usb/renesas_usbhs/mod_gadget.c:335:14: warning: incorrect type in assignment (different base types) drivers/usb/renesas_usbhs/mod_gadget.c:335:14: expected unsigned short drivers/usb/renesas_usbhs/mod_gadget.c:335:14: got restricted __le16 [usertype] Reviewed-by: Yoshihiro Shimoda Signed-off-by: Ben Dooks Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/mod_gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index efc40bc1f68c..cd38d74b3223 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -315,7 +315,7 @@ static void __usbhsg_recip_send_status(struct usbhsg_gpriv *gpriv, struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(dcp); struct device *dev = usbhsg_gpriv_to_dev(gpriv); struct usb_request *req; - unsigned short *buf; + __le16 *buf; /* alloc new usb_request for recip */ req = usb_ep_alloc_request(&dcp->ep, GFP_ATOMIC); -- cgit v1.2.3 From ba3a1a915c49cc3023e4ddfc88f21e7514e82aa4 Mon Sep 17 00:00:00 2001 From: Cristian Birsan Date: Fri, 4 Oct 2019 20:10:54 +0300 Subject: usb: gadget: udc: atmel: Fix interrupt storm in FIFO mode. Fix interrupt storm generated by endpoints when working in FIFO mode. The TX_COMPLETE interrupt is used only by control endpoints processing. Do not enable it for other types of endpoints. Fixes: 914a3f3b3754 ("USB: add atmel_usba_udc driver") Signed-off-by: Cristian Birsan Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/atmel_usba_udc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index 86ffc8307864..1d0d8952a74b 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -449,9 +449,11 @@ static void submit_request(struct usba_ep *ep, struct usba_request *req) next_fifo_transaction(ep, req); if (req->last_transaction) { usba_ep_writel(ep, CTL_DIS, USBA_TX_PK_RDY); - usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE); + if (ep_is_control(ep)) + usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE); } else { - usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE); + if (ep_is_control(ep)) + usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE); usba_ep_writel(ep, CTL_ENB, USBA_TX_PK_RDY); } } -- cgit v1.2.3 From 1c20c89b0421b52b2417bb0f62a611bc669eda1d Mon Sep 17 00:00:00 2001 From: Chandana Kishori Chiluveru Date: Tue, 1 Oct 2019 13:16:48 +0530 Subject: usb: gadget: composite: Fix possible double free memory bug composite_dev_cleanup call from the failure of configfs_composite_bind frees up the cdev->os_desc_req and cdev->req. If the previous calls of bind and unbind is successful these will carry stale values. Consider the below sequence of function calls: configfs_composite_bind() composite_dev_prepare() - Allocate cdev->req, cdev->req->buf composite_os_desc_req_prepare() - Allocate cdev->os_desc_req, cdev->os_desc_req->buf configfs_composite_unbind() composite_dev_cleanup() - free the cdev->os_desc_req->buf and cdev->req->buf Next composition switch configfs_composite_bind() - If it fails goto err_comp_cleanup will call the composite_dev_cleanup() function composite_dev_cleanup() - calls kfree up with the stale values of cdev->req->buf and cdev->os_desc_req from the previous configfs_composite_bind call. The free call on these stale values leads to double free. Hence, Fix this issue by setting request and buffer pointer to NULL after kfree. Signed-off-by: Chandana Kishori Chiluveru Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index d516e8d6cd7f..5ec54b69c29c 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2170,14 +2170,18 @@ void composite_dev_cleanup(struct usb_composite_dev *cdev) usb_ep_dequeue(cdev->gadget->ep0, cdev->os_desc_req); kfree(cdev->os_desc_req->buf); + cdev->os_desc_req->buf = NULL; usb_ep_free_request(cdev->gadget->ep0, cdev->os_desc_req); + cdev->os_desc_req = NULL; } if (cdev->req) { if (cdev->setup_pending) usb_ep_dequeue(cdev->gadget->ep0, cdev->req); kfree(cdev->req->buf); + cdev->req->buf = NULL; usb_ep_free_request(cdev->gadget->ep0, cdev->req); + cdev->req = NULL; } cdev->next_string_id = 0; device_remove_file(&cdev->gadget->dev, &dev_attr_suspended); -- cgit v1.2.3 From 9bbfceea12a8f145097a27d7c7267af25893c060 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sun, 29 Sep 2019 21:41:45 -0500 Subject: usb: dwc3: pci: prevent memory leak in dwc3_pci_probe In dwc3_pci_probe a call to platform_device_alloc allocates a device which is correctly put in case of error except one case: when the call to platform_device_add_properties fails it directly returns instead of going to error handling. This commit replaces return with the goto. Fixes: 1a7b12f69a94 ("usb: dwc3: pci: Supply device properties via driver data") Signed-off-by: Navid Emamdoost Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 5e8e18222f92..023f0357efd7 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -258,7 +258,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = platform_device_add_properties(dwc->dwc3, p); if (ret < 0) - return ret; + goto err; ret = dwc3_pci_quirks(dwc); if (ret) -- cgit v1.2.3 From 1a1c851bbd706ea9f3a9756c2d3db28523506d3b Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Mon, 26 Aug 2019 15:10:55 -0400 Subject: usb: gadget: configfs: fix concurrent issue between composite APIs We meet several NULL pointer issues if configfs_composite_unbind and composite_setup (or composite_disconnect) are running together. These issues occur when do the function switch stress test, the configfs_compsoite_unbind is called from user mode by echo "" to /sys/../UDC entry, and meanwhile, the setup interrupt or disconnect interrupt occurs by hardware. The composite_setup will get the cdev from get_gadget_data, but configfs_composite_unbind will set gadget data as NULL, so the NULL pointer issue occurs. This concurrent is hard to reproduce by native kernel, but can be reproduced by android kernel. In this commit, we introduce one spinlock belongs to structure gadget_info since we can't use the same spinlock in usb_composite_dev due to exclusive running together between composite_setup and configfs_composite_unbind. And one bit flag 'unbind' to indicate the code is at unbind routine, this bit is needed due to we release the lock at during configfs_composite_unbind sometimes, and composite_setup may be run at that time. Several oops: oops 1: android_work: sent uevent USB_STATE=CONNECTED configfs-gadget gadget: super-speed config #1: b android_work: sent uevent USB_STATE=CONFIGURED init: Received control message 'start' for 'adbd' from pid: 3515 (system_server) Unable to handle kernel NULL pointer dereference at virtual address 0000002a init: Received control message 'stop' for 'adbd' from pid: 3375 (/vendor/bin/hw/android.hardware.usb@1.1-servic) Mem abort info: Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgd = ffff8008f1b7f000 [000000000000002a] *pgd=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT SMP Modules linked in: CPU: 4 PID: 2457 Comm: irq/125-5b11000 Not tainted 4.14.98-07846-g0b40a9b-dirty #16 Hardware name: Freescale i.MX8QM MEK (DT) task: ffff8008f2a98000 task.stack: ffff00000b7b8000 PC is at composite_setup+0x44/0x1508 LR is at android_setup+0xb8/0x13c pc : [] lr : [] pstate: 800001c5 sp : ffff00000b7bbb80 x29: ffff00000b7bbb80 x28: ffff8008f2a3c010 x27: 0000000000000001 x26: 0000000000000000 [1232/1897] audit: audit_lost=25791 audit_rate_limit=5 audit_backlog_limit=64 x25: 00000000ffffffa1 x24: ffff8008f2a3c010 audit: rate limit exceeded x23: 0000000000000409 x22: ffff000009c8e000 x21: ffff8008f7a8b428 x20: ffff00000afae000 x19: ffff0000089ff000 x18: 0000000000000000 x17: 0000000000000000 x16: ffff0000082b7c9c x15: 0000000000000000 x14: f1866f5b952aca46 x13: e35502e30d44349c x12: 0000000000000008 x11: 0000000000000008 x10: 0000000000000a30 x9 : ffff00000b7bbd00 x8 : ffff8008f2a98a90 x7 : ffff8008f27a9c90 x6 : 0000000000000001 x5 : 0000000000000000 x4 : 0000000000000001 x3 : 0000000000000000 x2 : 0000000000000006 x1 : ffff0000089ff8d0 x0 : 732a010310b9ed00 X7: 0xffff8008f27a9c10: 9c10 00000002 00000000 00000001 00000000 13110000 ffff0000 00000002 00208040 9c30 00000000 00000000 00000000 00000000 00000000 00000005 00000029 00000000 9c50 00051778 00000001 f27a8e00 ffff8008 00000005 00000000 00000078 00000078 9c70 00000078 00000000 09031d48 ffff0000 00100000 00000000 00400000 00000000 9c90 00000001 00000000 00000000 00000000 00000000 00000000 ffefb1a0 ffff8008 9cb0 f27a9ca8 ffff8008 00000000 00000000 b9d88037 00000173 1618a3eb 00000001 9cd0 870a792a 0000002e 16188fe6 00000001 0000242b 00000000 00000000 00000000 using random self ethernet address 9cf0 019a4646 00000000 000547f3 00000000 ecfd6c33 00000002 00000000 using random host ethernet address 00000000 X8: 0xffff8008f2a98a10: 8a10 00000000 00000000 f7788d00 ffff8008 00000001 00000000 00000000 00000000 8a30 eb218000 ffff8008 f2a98000 ffff8008 f2a98000 ffff8008 09885000 ffff0000 8a50 f34df480 ffff8008 00000000 00000000 f2a98648 ffff8008 09c8e000 ffff0000 8a70 fff2c800 ffff8008 09031d48 ffff0000 0b7bbd00 ffff0000 0b7bbd00 ffff0000 8a90 080861bc ffff0000 00000000 00000000 00000000 00000000 00000000 00000000 8ab0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 8ad0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 8af0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 X21: 0xffff8008f7a8b3a8: b3a8 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 b3c8 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 b3e8 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 b408 00000000 00000000 00000000 00000000 00000000 00000000 00000001 00000000 b428 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 b448 0053004d 00540046 00300031 00010030 eb07b520 ffff8008 20011201 00000003 b468 e418d109 0104404e 00010302 00000000 eb07b558 ffff8008 eb07b558 ffff8008 b488 f7a8b488 ffff8008 f7a8b488 ffff8008 f7a8b300 ffff8008 00000000 00000000 X24: 0xffff8008f2a3bf90: bf90 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfb0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfd0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bff0 00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008 c010 00000000 00000000 f2a3c018 ffff8008 f2a3c018 ffff8008 08a067dc ffff0000 c030 f2a5a000 ffff8008 091c3650 ffff0000 f716fd18 ffff8008 f716fe30 ffff8008 c050 f2ce4a30 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000 c070 f76c8010 ffff8008 f2ce4b00 ffff8008 095cac68 ffff0000 f2a5a028 ffff8008 X28: 0xffff8008f2a3bf90: bf90 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfb0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfd0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bff0 00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008 c010 00000000 00000000 f2a3c018 ffff8008 f2a3c018 ffff8008 08a067dc ffff0000 c030 f2a5a000 ffff8008 091c3650 ffff0000 f716fd18 ffff8008 f716fe30 ffff8008 c050 f2ce4a30 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000 c070 f76c8010 ffff8008 f2ce4b00 ffff8008 095cac68 ffff0000 f2a5a028 ffff8008 Process irq/125-5b11000 (pid: 2457, stack limit = 0xffff00000b7b8000) Call trace: Exception stack(0xffff00000b7bba40 to 0xffff00000b7bbb80) ba40: 732a010310b9ed00 ffff0000089ff8d0 0000000000000006 0000000000000000 ba60: 0000000000000001 0000000000000000 0000000000000001 ffff8008f27a9c90 ba80: ffff8008f2a98a90 ffff00000b7bbd00 0000000000000a30 0000000000000008 baa0: 0000000000000008 e35502e30d44349c f1866f5b952aca46 0000000000000000 bac0: ffff0000082b7c9c 0000000000000000 0000000000000000 ffff0000089ff000 bae0: ffff00000afae000 ffff8008f7a8b428 ffff000009c8e000 0000000000000409 bb00: ffff8008f2a3c010 00000000ffffffa1 0000000000000000 0000000000000001 bb20: ffff8008f2a3c010 ffff00000b7bbb80 ffff000008a032fc ffff00000b7bbb80 bb40: ffff0000089ffb3c 00000000800001c5 ffff00000b7bbb80 732a010310b9ed00 bb60: ffffffffffffffff ffff0000080f777c ffff00000b7bbb80 ffff0000089ffb3c [] composite_setup+0x44/0x1508 [] android_setup+0xb8/0x13c [] cdns3_ep0_delegate_req+0x44/0x70 [] cdns3_check_ep0_interrupt_proceed+0x33c/0x654 [] cdns3_device_thread_irq_handler+0x4b0/0x4bc [] cdns3_thread_irq+0x48/0x68 [] irq_thread_fn+0x28/0x88 [] irq_thread+0x13c/0x228 [] kthread+0x104/0x130 [] ret_from_fork+0x10/0x18 oops2: composite_disconnect: Calling disconnect on a Gadget that is not connected android_work: did not send uevent (0 0 (null)) init: Received control message 'stop' for 'adbd' from pid: 3359 (/vendor/bin/hw/android.hardware.usb@1.1-service.imx) init: Sending signal 9 to service 'adbd' (pid 22343) process group... ------------[ cut here ]------------ audit: audit_lost=180038 audit_rate_limit=5 audit_backlog_limit=64 audit: rate limit exceeded WARNING: CPU: 0 PID: 3468 at kernel_imx/drivers/usb/gadget/composite.c:2009 composite_disconnect+0x80/0x88 Modules linked in: CPU: 0 PID: 3468 Comm: HWC-UEvent-Thre Not tainted 4.14.98-07846-g0b40a9b-dirty #16 Hardware name: Freescale i.MX8QM MEK (DT) task: ffff8008f2349c00 task.stack: ffff00000b0a8000 PC is at composite_disconnect+0x80/0x88 LR is at composite_disconnect+0x80/0x88 pc : [] lr : [] pstate: 600001c5 sp : ffff000008003dd0 x29: ffff000008003dd0 x28: ffff8008f2349c00 x27: ffff000009885018 x26: ffff000008004000 Timeout for IPC response! x25: ffff000009885018 x24: ffff000009c8e280 x23: ffff8008f2d98010 x22: 00000000000001c0 x21: ffff8008f2d98394 x20: ffff8008f2d98010 x19: 0000000000000000 x18: 0000e3956f4f075a fxos8700 4-001e: i2c block read acc failed x17: 0000e395735727e8 x16: ffff00000829f4d4 x15: ffffffffffffffff x14: 7463656e6e6f6320 x13: 746f6e2009090920 x12: 7369207461687420 x11: 7465676461472061 x10: 206e6f207463656e x9 : 6e6f637369642067 x8 : ffff000009c8e280 x7 : ffff0000086ca6cc x6 : ffff000009f15e78 x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffffffffffffffff x2 : c3f28b86000c3900 x1 : c3f28b86000c3900 x0 : 000000000000004e X20: 0xffff8008f2d97f90: 7f90 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 7fb0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 libprocessgroup: Failed to kill process cgroup uid 0 pid 22343 in 215ms, 1 processes remain 7fd0 Timeout for IPC response! 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 using random self ethernet address 7ff0 00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008 8010 00000100 00000000 f2d98018 ffff8008 f2d98018 ffff8008 08a067dc using random host ethernet address ffff0000 8030 f206d800 ffff8008 091c3650 ffff0000 f7957b18 ffff8008 f7957730 ffff8008 8050 f716a630 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000 8070 f76c8010 ffff8008 f716a800 ffff8008 095cac68 ffff0000 f206d828 ffff8008 X21: 0xffff8008f2d98314: 8314 ffff8008 00000000 00000000 00000000 00000000 00000000 00000000 00000000 8334 00000000 00000000 00000000 00000000 00000000 08a04cf4 ffff0000 00000000 8354 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 8374 00000000 00000000 00000000 00001001 00000000 00000000 00000000 00000000 8394 e4bbe4bb 0f230000 ffff0000 0afae000 ffff0000 ae001000 00000000 f206d400 Timeout for IPC response! 83b4 ffff8008 00000000 00000000 f7957b18 ffff8008 f7957718 ffff8008 f7957018 83d4 ffff8008 f7957118 ffff8008 f7957618 ffff8008 f7957818 ffff8008 f7957918 83f4 ffff8008 f7957d18 ffff8008 00000000 00000000 00000000 00000000 00000000 X23: 0xffff8008f2d97f90: 7f90 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 7fb0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 7fd0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 7ff0 00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008 8010 00000100 00000000 f2d98018 ffff8008 f2d98018 ffff8008 08a067dc ffff0000 8030 f206d800 ffff8008 091c3650 ffff0000 f7957b18 ffff8008 f7957730 ffff8008 8050 f716a630 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000 8070 f76c8010 ffff8008 f716a800 ffff8008 095cac68 ffff0000 f206d828 ffff8008 X28: 0xffff8008f2349b80: 9b80 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9ba0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9bc0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9be0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9c00 00000022 00000000 ffffffff ffffffff 00010001 00000000 00000000 00000000 9c20 0b0a8000 ffff0000 00000002 00404040 00000000 00000000 00000000 00000000 9c40 00000001 00000000 00000001 00000000 001ebd44 00000001 f390b800 ffff8008 9c60 00000000 00000001 00000070 00000070 00000070 00000000 09031d48 ffff0000 Call trace: Exception stack(0xffff000008003c90 to 0xffff000008003dd0) 3c80: 000000000000004e c3f28b86000c3900 3ca0: c3f28b86000c3900 ffffffffffffffff 0000000000000000 0000000000000000 3cc0: ffff000009f15e78 ffff0000086ca6cc ffff000009c8e280 6e6f637369642067 3ce0: 206e6f207463656e 7465676461472061 7369207461687420 746f6e2009090920 3d00: 7463656e6e6f6320 ffffffffffffffff ffff00000829f4d4 0000e395735727e8 3d20: 0000e3956f4f075a 0000000000000000 ffff8008f2d98010 ffff8008f2d98394 3d40: 00000000000001c0 ffff8008f2d98010 ffff000009c8e280 ffff000009885018 3d60: ffff000008004000 ffff000009885018 ffff8008f2349c00 ffff000008003dd0 3d80: ffff0000089ff9b0 ffff000008003dd0 ffff0000089ff9b0 00000000600001c5 3da0: ffff8008f33f2cd8 0000000000000000 0000ffffffffffff 0000000000000000 init: Received control message 'start' for 'adbd' from pid: 3359 (/vendor/bin/hw/android.hardware.usb@1.1-service.imx) 3dc0: ffff000008003dd0 ffff0000089ff9b0 [] composite_disconnect+0x80/0x88 [] android_disconnect+0x3c/0x68 [] cdns3_device_irq_handler+0xfc/0x2c8 [] cdns3_irq+0x44/0x94 [] __handle_irq_event_percpu+0x60/0x24c [] handle_irq_event+0x58/0xc0 [] handle_fasteoi_irq+0x98/0x180 [] generic_handle_irq+0x24/0x38 [] __handle_domain_irq+0x60/0xac [] gic_handle_irq+0xd4/0x17c Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi --- drivers/usb/gadget/configfs.c | 110 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 025129942894..33852c2b29d1 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -61,6 +61,8 @@ struct gadget_info { bool use_os_desc; char b_vendor_code; char qw_sign[OS_STRING_QW_SIGN_LEN]; + spinlock_t spinlock; + bool unbind; }; static inline struct gadget_info *to_gadget_info(struct config_item *item) @@ -1244,6 +1246,7 @@ static int configfs_composite_bind(struct usb_gadget *gadget, int ret; /* the gi->lock is hold by the caller */ + gi->unbind = 0; cdev->gadget = gadget; set_gadget_data(gadget, cdev); ret = composite_dev_prepare(composite, cdev); @@ -1376,31 +1379,128 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) { struct usb_composite_dev *cdev; struct gadget_info *gi; + unsigned long flags; /* the gi->lock is hold by the caller */ cdev = get_gadget_data(gadget); gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + gi->unbind = 1; + spin_unlock_irqrestore(&gi->spinlock, flags); kfree(otg_desc[0]); otg_desc[0] = NULL; purge_configs_funcs(gi); composite_dev_cleanup(cdev); usb_ep_autoconfig_reset(cdev->gadget); + spin_lock_irqsave(&gi->spinlock, flags); cdev->gadget = NULL; set_gadget_data(gadget, NULL); + spin_unlock_irqrestore(&gi->spinlock, flags); +} + +static int configfs_composite_setup(struct usb_gadget *gadget, + const struct usb_ctrlrequest *ctrl) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + int ret; + + cdev = get_gadget_data(gadget); + if (!cdev) + return 0; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return 0; + } + + ret = composite_setup(gadget, ctrl); + spin_unlock_irqrestore(&gi->spinlock, flags); + return ret; +} + +static void configfs_composite_disconnect(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + + cdev = get_gadget_data(gadget); + if (!cdev) + return; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return; + } + + composite_disconnect(gadget); + spin_unlock_irqrestore(&gi->spinlock, flags); +} + +static void configfs_composite_suspend(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + + cdev = get_gadget_data(gadget); + if (!cdev) + return; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return; + } + + composite_suspend(gadget); + spin_unlock_irqrestore(&gi->spinlock, flags); +} + +static void configfs_composite_resume(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + + cdev = get_gadget_data(gadget); + if (!cdev) + return; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return; + } + + composite_resume(gadget); + spin_unlock_irqrestore(&gi->spinlock, flags); } static const struct usb_gadget_driver configfs_driver_template = { .bind = configfs_composite_bind, .unbind = configfs_composite_unbind, - .setup = composite_setup, - .reset = composite_disconnect, - .disconnect = composite_disconnect, + .setup = configfs_composite_setup, + .reset = configfs_composite_disconnect, + .disconnect = configfs_composite_disconnect, - .suspend = composite_suspend, - .resume = composite_resume, + .suspend = configfs_composite_suspend, + .resume = configfs_composite_resume, .max_speed = USB_SPEED_SUPER, .driver = { -- cgit v1.2.3 From a7d9874c6f3fbc8d25cd9ceba35b6822612c4ebf Mon Sep 17 00:00:00 2001 From: Yinbo Zhu Date: Mon, 29 Jul 2019 14:46:07 +0800 Subject: usb: dwc3: remove the call trace of USBx_GFLADJ layerscape board sometimes reported some usb call trace, that is due to kernel sent LPM tokerns automatically when it has no pending transfers and think that the link is idle enough to enter L1, which procedure will ask usb register has a recovery,then kernel will compare USBx_GFLADJ and set GFLADJ_30MHZ, GFLADJ_30MHZ_REG until GFLADJ_30MHZ is equal 0x20, if the conditions were met then issue occur, but whatever the conditions whether were met that usb is all need keep GFLADJ_30MHZ of value is 0x20 (xhci spec ask use GFLADJ_30MHZ to adjust any offset from clock source that generates the clock that drives the SOF counter, 0x20 is default value of it)That is normal logic, so need remove the call trace. Signed-off-by: Yinbo Zhu Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 999ce5e84d3c..97d6ae3c4df2 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -312,8 +312,7 @@ static void dwc3_frame_length_adjustment(struct dwc3 *dwc) reg = dwc3_readl(dwc->regs, DWC3_GFLADJ); dft = reg & DWC3_GFLADJ_30MHZ_MASK; - if (!dev_WARN_ONCE(dwc->dev, dft == dwc->fladj, - "request value same as default, ignoring\n")) { + if (dft != dwc->fladj) { reg &= ~DWC3_GFLADJ_30MHZ_MASK; reg |= DWC3_GFLADJ_30MHZ_SDBND_SEL | dwc->fladj; dwc3_writel(dwc->regs, DWC3_GFLADJ, reg); -- cgit v1.2.3 From f3fb802efaef3662744a2215a51294d52a7cfc0e Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 23 Oct 2019 12:02:32 +0300 Subject: usb: cdns3: gadget: Don't manage pullups The USB gadget core is supposed to manage pullups of the controller. Don't manage pullups from within the controller driver. Otherwise, function drivers are not able to keep the controller disconnected from the bus till they are ready. (e.g. g_webcam) Reviewed-by: Pawel Laszczak Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/cdns3/gadget.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 9050b380ab83..d9e7f2d06098 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -2329,8 +2329,6 @@ static void cdns3_gadget_config(struct cdns3_device *priv_dev) writel(USB_CONF_CLK2OFFDS | USB_CONF_L1DS, ®s->usb_conf); cdns3_configure_dmult(priv_dev, NULL); - - cdns3_gadget_pullup(&priv_dev->gadget, 1); } /** @@ -2713,8 +2711,6 @@ static int cdns3_gadget_suspend(struct cdns3 *cdns, bool do_wakeup) /* disable interrupt for device */ writel(0, &priv_dev->regs->usb_ien); - cdns3_gadget_pullup(&priv_dev->gadget, 0); - return 0; } -- cgit v1.2.3 From e6afcf6c598d6f3a0c9c408bfeddb3f5730608b0 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 25 Oct 2019 17:04:20 +0200 Subject: iio: adc: stm32-adc: fix stopping dma There maybe a race when using dmaengine_terminate_all(). The predisable routine may call iio_triggered_buffer_predisable() prior to a pending DMA callback. Adopt dmaengine_terminate_sync() to ensure there's no pending DMA request before calling iio_triggered_buffer_predisable(). Fixes: 2763ea0585c9 ("iio: adc: stm32: add optional dma support") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 663f8a5012d6..73aee5949b6b 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1399,7 +1399,7 @@ static int stm32_adc_dma_start(struct iio_dev *indio_dev) cookie = dmaengine_submit(desc); ret = dma_submit_error(cookie); if (ret) { - dmaengine_terminate_all(adc->dma_chan); + dmaengine_terminate_sync(adc->dma_chan); return ret; } @@ -1477,7 +1477,7 @@ static void __stm32_adc_buffer_predisable(struct iio_dev *indio_dev) stm32_adc_conv_irq_disable(adc); if (adc->dma_chan) - dmaengine_terminate_all(adc->dma_chan); + dmaengine_terminate_sync(adc->dma_chan); if (stm32_adc_set_trig(indio_dev, NULL)) dev_err(&indio_dev->dev, "Can't clear trigger\n"); -- cgit v1.2.3 From a08d897bc04f23c608dadde5c31ef194911e78bb Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 26 Oct 2019 16:00:44 -0500 Subject: fix memory leak in large read decrypt offload Spotted by Ronnie. Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 4c0922596467..cd55af9b7cc5 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4084,6 +4084,7 @@ free_pages: kfree(dw->ppages); cifs_small_buf_release(dw->buf); + kfree(dw); } @@ -4157,7 +4158,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid, dw->server = server; dw->ppages = pages; dw->len = len; - queue_work(cifsiod_wq, &dw->decrypt); + queue_work(decrypt_wq, &dw->decrypt); *num_mids = 0; /* worker thread takes care of finding mid */ return -1; } -- cgit v1.2.3 From cabe5f85e63626c00f3b879a670ec27325056a2d Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Wed, 16 Oct 2019 09:40:05 -0500 Subject: ARM: dts: imx6-logicpd: Re-enable SNVS power key The baseboard of the Logic PD i.MX6 development kit has a power button routed which can both power down and power up the board. It can also wake the board from sleep. This functionality was marked as disabled by default in imx6qdl.dtsi, so it needs to be explicitly enabled for each board. This patch enables the snvs power key again. Signed-off-by: Adam Ford Fixes: 770856f0da5d ("ARM: dts: imx6qdl: Enable SNVS power key according to board design") Cc: stable #5.3+ Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi b/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi index 2a6ce87071f9..9e027b9a5f91 100644 --- a/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi +++ b/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi @@ -328,6 +328,10 @@ pinctrl-0 = <&pinctrl_pwm3>; }; +&snvs_pwrkey { + status = "okay"; +}; + &ssi2 { status = "okay"; }; -- cgit v1.2.3 From 7b20238d28da46f394d37d4d51cc420e1ff9414a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 27 Oct 2019 22:10:36 +0300 Subject: io_uring: Fix leaked shadow_req io_queue_link_head() owns shadow_req after taking it as an argument. By not freeing it in case of an error, it can leak the request along with taken ctx->refs. Reviewed-by: Jackie Liu Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index a30c4f622cb3..ba1431046c98 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2413,6 +2413,7 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req, if (ret) { if (ret != -EIOCBQUEUED) { io_free_req(req); + __io_free_req(shadow); io_cqring_add_event(ctx, s->sqe->user_data, ret); return 0; } -- cgit v1.2.3 From ffaee2728f9b276fc8829abb90f290b5b4b96282 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 17 Oct 2019 15:00:17 -0700 Subject: riscv: add prototypes for assembly language functions from head.S Add prototypes for assembly language functions defined in head.S, and include these prototypes into C source files that call those functions. This patch resolves the following warnings from sparse: arch/riscv/kernel/setup.c:39:10: warning: symbol 'hart_lottery' was not declared. Should it be static? arch/riscv/kernel/setup.c:42:13: warning: symbol 'parse_dtb' was not declared. Should it be static? arch/riscv/kernel/smpboot.c:33:6: warning: symbol '__cpu_up_stack_pointer' was not declared. Should it be static? arch/riscv/kernel/smpboot.c:34:6: warning: symbol '__cpu_up_task_pointer' was not declared. Should it be static? arch/riscv/mm/fault.c:25:17: warning: symbol 'do_page_fault' was not declared. Should it be static? This change should have no functional impact. Signed-off-by: Paul Walmsley --- arch/riscv/kernel/head.h | 21 +++++++++++++++++++++ arch/riscv/kernel/setup.c | 2 ++ arch/riscv/kernel/smpboot.c | 2 ++ arch/riscv/mm/fault.c | 2 ++ arch/riscv/mm/init.c | 2 ++ 5 files changed, 29 insertions(+) create mode 100644 arch/riscv/kernel/head.h diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h new file mode 100644 index 000000000000..105fb0496b24 --- /dev/null +++ b/arch/riscv/kernel/head.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 SiFive, Inc. + */ +#ifndef __ASM_HEAD_H +#define __ASM_HEAD_H + +#include +#include + +extern atomic_t hart_lottery; + +asmlinkage void do_page_fault(struct pt_regs *regs); +asmlinkage void __init setup_vm(uintptr_t dtb_pa); + +extern void *__cpu_up_stack_pointer[]; +extern void *__cpu_up_task_pointer[]; + +void __init parse_dtb(void); + +#endif /* __ASM_HEAD_H */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index a990a6cb184f..845ae0e12115 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -24,6 +24,8 @@ #include #include +#include "head.h" + #ifdef CONFIG_DUMMY_CONSOLE struct screen_info screen_info = { .orig_video_lines = 30, diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 18ae6da5115e..59fa59e013d4 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -30,6 +30,8 @@ #include #include +#include "head.h" + void *__cpu_up_stack_pointer[NR_CPUS]; void *__cpu_up_task_pointer[NR_CPUS]; static DECLARE_COMPLETION(cpu_running); diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 96add1427a75..247b8c859c44 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -18,6 +18,8 @@ #include #include +#include "../kernel/head.h" + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index a1ca6200c31f..07af7b1e4069 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -19,6 +19,8 @@ #include #include +#include "../kernel/head.h" + unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -- cgit v1.2.3 From 6a527b6785ba1d19d6338439352de6c21e8847c3 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 17 Oct 2019 14:45:58 -0700 Subject: riscv: init: merge split string literals in preprocessor directive sparse complains loudly when string literals associated with preprocessor directives are split into multiple, separately quoted strings across different lines: arch/riscv/mm/init.c:341:9: error: Expected ; at the end of type declaration arch/riscv/mm/init.c:341:9: error: got "not use absolute addressing." arch/riscv/mm/init.c:358:9: error: Trying to use reserved word 'do' as identifier arch/riscv/mm/init.c:358:9: error: Expected ; at end of declaration [ ... ] It turns out this doesn't compile. The existing Linux practice for this situation is simply to use a single long line. So, fix by concatenating the strings. This patch should have no functional impact. This version incorporates changes based on feedback from Luc Van Oostenryck . Signed-off-by: Paul Walmsley Reviewed-by: Luc Van Oostenryck Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/linux-riscv/CAAhSdy2nX2LwEEAZuMtW_ByGTkHO6KaUEvVxRnba_ENEjmFayQ@mail.gmail.com/T/#mc1a58bc864f71278123d19a7abc083a9c8e37033 Fixes: 387181dcdb6c1 ("RISC-V: Always compile mm/init.c with cmodel=medany and notrace") Cc: Anup Patel --- arch/riscv/mm/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 07af7b1e4069..573463d1c799 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -339,8 +339,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) */ #ifndef __riscv_cmodel_medany -#error "setup_vm() is called from head.S before relocate so it should " - "not use absolute addressing." +#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." #endif asmlinkage void __init setup_vm(uintptr_t dtb_pa) -- cgit v1.2.3 From bf6df5dd25b74400424f3ff5a61edad2fd6904e6 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 17 Oct 2019 15:08:48 -0700 Subject: riscv: mark some code and data as file-static Several functions and arrays which are only used in the files in which they are declared are missing "static" qualifiers. Warnings for these symbols are reported by sparse: arch/riscv/kernel/vdso.c:28:18: warning: symbol 'vdso_data' was not declared. Should it be static? arch/riscv/mm/sifive_l2_cache.c:145:12: warning: symbol 'sifive_l2_init' was not declared. Should it be static? Resolve these warnings by marking them as static. This version incorporates feedback from Greentime Hu . Signed-off-by: Paul Walmsley Reviewed-by: Christoph Hellwig Cc: Greentime Hu --- arch/riscv/kernel/vdso.c | 2 +- arch/riscv/mm/sifive_l2_cache.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index c9c21e0d5641..e24fccab8185 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -25,7 +25,7 @@ static union { struct vdso_data data; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +static struct vdso_data *vdso_data = &vdso_data_store.data; static int __init vdso_init(void) { diff --git a/arch/riscv/mm/sifive_l2_cache.c b/arch/riscv/mm/sifive_l2_cache.c index 2e637ad71c05..a9ffff3277c7 100644 --- a/arch/riscv/mm/sifive_l2_cache.c +++ b/arch/riscv/mm/sifive_l2_cache.c @@ -142,7 +142,7 @@ static irqreturn_t l2_int_handler(int irq, void *device) return IRQ_HANDLED; } -int __init sifive_l2_init(void) +static int __init sifive_l2_init(void) { struct device_node *np; struct resource res; -- cgit v1.2.3 From 5ed881bc3afc40d7a23c2211ead1aeb4980dda20 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 17 Oct 2019 15:21:28 -0700 Subject: riscv: add missing header file includes sparse identifies several missing prototypes caused by missing preprocessor include directives: arch/riscv/kernel/cpufeature.c:16:6: warning: symbol 'has_fpu' was not declared. Should it be static? arch/riscv/kernel/process.c:26:6: warning: symbol 'arch_cpu_idle' was not declared. Should it be static? arch/riscv/kernel/reset.c:15:6: warning: symbol 'pm_power_off' was not declared. Should it be static? arch/riscv/kernel/syscall_table.c:15:6: warning: symbol 'sys_call_table' was not declared. Should it be static? arch/riscv/kernel/traps.c:149:13: warning: symbol 'trap_init' was not declared. Should it be static? arch/riscv/kernel/vdso.c:54:5: warning: symbol 'arch_setup_additional_pages' was not declared. Should it be static? arch/riscv/kernel/smp.c:64:6: warning: symbol 'arch_match_cpu_phys_id' was not declared. Should it be static? arch/riscv/kernel/module-sections.c:89:5: warning: symbol 'module_frob_arch_sections' was not declared. Should it be static? arch/riscv/mm/context.c:42:6: warning: symbol 'switch_mm' was not declared. Should it be static? Fix by including the appropriate header files in the appropriate source files. This patch should have no functional impact. Signed-off-by: Paul Walmsley Reviewed-by: Christoph Hellwig --- arch/riscv/include/asm/irq.h | 3 +++ arch/riscv/include/asm/switch_to.h | 1 + arch/riscv/kernel/cpufeature.c | 1 + arch/riscv/kernel/module-sections.c | 1 + arch/riscv/kernel/process.c | 2 ++ arch/riscv/kernel/reset.c | 1 + arch/riscv/kernel/smp.c | 2 ++ arch/riscv/kernel/smpboot.c | 1 + arch/riscv/kernel/syscall_table.c | 1 + arch/riscv/kernel/time.c | 1 + arch/riscv/kernel/traps.c | 1 + arch/riscv/kernel/vdso.c | 1 + arch/riscv/mm/context.c | 1 + 13 files changed, 17 insertions(+) diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 75576424c0f7..6e1b0e0325eb 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -7,6 +7,9 @@ #ifndef _ASM_RISCV_IRQ_H #define _ASM_RISCV_IRQ_H +#include +#include + #define NR_IRQS 0 void riscv_timer_interrupt(void); diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index f0227bdce0f0..ee4f0ac62c9d 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -6,6 +6,7 @@ #ifndef _ASM_RISCV_SWITCH_TO_H #define _ASM_RISCV_SWITCH_TO_H +#include #include #include #include diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index b1ade9a49347..a5ad00043104 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -10,6 +10,7 @@ #include #include #include +#include unsigned long elf_hwcap __read_mostly; #ifdef CONFIG_FPU diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index c9ae48333114..e264e59e596e 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -8,6 +8,7 @@ #include #include #include +#include unsigned long module_emit_got_entry(struct module *mod, unsigned long val) { diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index fb3a082362eb..85e3c39bb60b 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include extern asmlinkage void ret_from_fork(void); extern asmlinkage void ret_from_kernel_thread(void); diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c index d0fe623bfb8f..aa56bb135ec4 100644 --- a/arch/riscv/kernel/reset.c +++ b/arch/riscv/kernel/reset.c @@ -4,6 +4,7 @@ */ #include +#include #include static void default_power_off(void) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b18cd6c8e8fb..5c9ec78422c2 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -8,7 +8,9 @@ * Copyright (C) 2017 SiFive */ +#include #include +#include #include #include #include diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 59fa59e013d4..ec0be2f6a2e8 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "head.h" diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index e5dd52d8f633..f1ead9df96ca 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -8,6 +8,7 @@ #include #include #include +#include #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 9dd1f2e64db1..6a53c02e9c73 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -7,6 +7,7 @@ #include #include #include +#include unsigned long riscv_timebase; EXPORT_SYMBOL_GPL(riscv_timebase); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 10a17e545f43..0b6e271efc43 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -3,6 +3,7 @@ * Copyright (C) 2012 Regents of the University of California */ +#include #include #include #include diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index e24fccab8185..484d95a70907 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -6,6 +6,7 @@ * Copyright (C) 2015 Regents of the University of California */ +#include #include #include #include diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index beeb5d7f92ea..ca66d44156b6 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -7,6 +7,7 @@ #include #include #include +#include /* * When necessary, performs a deferred icache flush for the given MM context, -- cgit v1.2.3 From a48dac448d85712dbc827cdfeb29f720f2c345ff Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 17 Oct 2019 15:41:25 -0700 Subject: riscv: fp: add missing __user pointer annotations The __user annotations were removed from the {save,restore}_fp_state() function signatures by commit 007f5c358957 ("Refactor FPU code in signal setup/return procedures"), but should be present, and sparse warns when they are not applied. Add them back in. This change should have no functional impact. Signed-off-by: Paul Walmsley Fixes: 007f5c358957 ("Refactor FPU code in signal setup/return procedures") Cc: Alan Kao Reviewed-by: Christoph Hellwig --- arch/riscv/kernel/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index b14d7647d800..64bc914ce9ff 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -26,7 +26,7 @@ struct rt_sigframe { #ifdef CONFIG_FPU static long restore_fp_state(struct pt_regs *regs, - union __riscv_fp_state *sc_fpregs) + union __riscv_fp_state __user *sc_fpregs) { long err; struct __riscv_d_ext_state __user *state = &sc_fpregs->d; @@ -53,7 +53,7 @@ static long restore_fp_state(struct pt_regs *regs, } static long save_fp_state(struct pt_regs *regs, - union __riscv_fp_state *sc_fpregs) + union __riscv_fp_state __user *sc_fpregs) { long err; struct __riscv_d_ext_state __user *state = &sc_fpregs->d; -- cgit v1.2.3 From f307307992bf63e609fe5395953048e81c9ebc54 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 17 Oct 2019 22:20:05 -0700 Subject: riscv: for C functions called only from assembly, mark with __visible Rather than adding prototypes for C functions called only by assembly code, mark them as __visible. This avoids adding prototypes that will never be used by the callers. Resolves the following sparse warnings: arch/riscv/kernel/irq.c:27:29: warning: symbol 'do_IRQ' was not declared. Should it be static? arch/riscv/kernel/ptrace.c:151:6: warning: symbol 'do_syscall_trace_enter' was not declared. Should it be static? arch/riscv/kernel/ptrace.c:165:6: warning: symbol 'do_syscall_trace_exit' was not declared. Should it be static? arch/riscv/kernel/signal.c:295:17: warning: symbol 'do_notify_resume' was not declared. Should it be static? arch/riscv/kernel/traps.c:92:1: warning: symbol 'do_trap_unknown' was not declared. Should it be static? arch/riscv/kernel/traps.c:94:1: warning: symbol 'do_trap_insn_misaligned' was not declared. Should it be static? arch/riscv/kernel/traps.c:96:1: warning: symbol 'do_trap_insn_fault' was not declared. Should it be static? arch/riscv/kernel/traps.c:98:1: warning: symbol 'do_trap_insn_illegal' was not declared. Should it be static? arch/riscv/kernel/traps.c:100:1: warning: symbol 'do_trap_load_misaligned' was not declared. Should it be static? arch/riscv/kernel/traps.c:102:1: warning: symbol 'do_trap_load_fault' was not declared. Should it be static? arch/riscv/kernel/traps.c:104:1: warning: symbol 'do_trap_store_misaligned' was not declared. Should it be static? arch/riscv/kernel/traps.c:106:1: warning: symbol 'do_trap_store_fault' was not declared. Should it be static? arch/riscv/kernel/traps.c:108:1: warning: symbol 'do_trap_ecall_u' was not declared. Should it be static? arch/riscv/kernel/traps.c:110:1: warning: symbol 'do_trap_ecall_s' was not declared. Should it be static? arch/riscv/kernel/traps.c:112:1: warning: symbol 'do_trap_ecall_m' was not declared. Should it be static? arch/riscv/kernel/traps.c:124:17: warning: symbol 'do_trap_break' was not declared. Should it be static? arch/riscv/kernel/smpboot.c:136:24: warning: symbol 'smp_callin' was not declared. Should it be static? Based on a suggestion from Luc Van Oostenryck. This version includes changes based on feedback from Christoph Hellwig . Signed-off-by: Paul Walmsley Cc: Luc Van Oostenryck Reviewed-by: Christoph Hellwig # for do_syscall_trace_* --- arch/riscv/kernel/irq.c | 2 +- arch/riscv/kernel/ptrace.c | 4 ++-- arch/riscv/kernel/signal.c | 4 ++-- arch/riscv/kernel/smpboot.c | 2 +- arch/riscv/kernel/traps.c | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index 6d8659388c49..fffac6ddb0e0 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -24,7 +24,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } -asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs) +asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 368751438366..1252113ef8b2 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -148,7 +148,7 @@ long arch_ptrace(struct task_struct *child, long request, * Allows PTRACE_SYSCALL to work. These are called from entry.S in * {handle,ret_from}_syscall. */ -void do_syscall_trace_enter(struct pt_regs *regs) +__visible void do_syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) if (tracehook_report_syscall_entry(regs)) @@ -162,7 +162,7 @@ void do_syscall_trace_enter(struct pt_regs *regs) audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3); } -void do_syscall_trace_exit(struct pt_regs *regs) +__visible void do_syscall_trace_exit(struct pt_regs *regs) { audit_syscall_exit(regs); diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 64bc914ce9ff..d0f6f212f5df 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -292,8 +292,8 @@ static void do_signal(struct pt_regs *regs) * notification of userspace execution resumption * - triggered by the _TIF_WORK_MASK flags */ -asmlinkage void do_notify_resume(struct pt_regs *regs, - unsigned long thread_info_flags) +asmlinkage __visible void do_notify_resume(struct pt_regs *regs, + unsigned long thread_info_flags) { /* Handle pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index ec0be2f6a2e8..261f4087cc39 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -133,7 +133,7 @@ void __init smp_cpus_done(unsigned int max_cpus) /* * C entry point for a secondary processor. */ -asmlinkage void __init smp_callin(void) +asmlinkage __visible void __init smp_callin(void) { struct mm_struct *mm = &init_mm; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 0b6e271efc43..473de3ae8bb7 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -84,7 +84,7 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code, } #define DO_ERROR_INFO(name, signo, code, str) \ -asmlinkage void name(struct pt_regs *regs) \ +asmlinkage __visible void name(struct pt_regs *regs) \ { \ do_trap_error(regs, signo, code, regs->sepc, "Oops - " str); \ } @@ -121,7 +121,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return (((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? 4UL : 2UL); } -asmlinkage void do_trap_break(struct pt_regs *regs) +asmlinkage __visible void do_trap_break(struct pt_regs *regs) { if (user_mode(regs)) force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->sepc); -- cgit v1.2.3 From 40ce7919d8730f5936da2bc8a21b46bd07db6411 Mon Sep 17 00:00:00 2001 From: Marvin Liu Date: Tue, 22 Oct 2019 01:10:04 +0800 Subject: virtio_ring: fix stalls for packed rings When VIRTIO_F_RING_EVENT_IDX is negotiated, virtio devices can use virtqueue_enable_cb_delayed_packed to reduce the number of device interrupts. At the moment, this is the case for virtio-net when the napi_tx module parameter is set to false. In this case, the virtio driver selects an event offset and expects that the device will send a notification when rolling over the event offset in the ring. However, if this roll-over happens before the event suppression structure update, the notification won't be sent. To address this race condition the driver needs to check wether the device rolled over the offset after updating the event suppression structure. With VIRTIO_F_RING_PACKED, the virtio driver did this by reading the flags field of the descriptor at the specified offset. Unfortunately, checking at the event offset isn't reliable: if descriptors are chained (e.g. when INDIRECT is off) not all descriptors are overwritten by the device, so it's possible that the device skipped the specific descriptor driver is checking when writing out used descriptors. If this happens, the driver won't detect the race condition and will incorrectly expect the device to send a notification. For virtio-net, the result will be a TX queue stall, with the transmission getting blocked forever. With the packed ring, it isn't easy to find a location which is guaranteed to change upon the roll-over, except the next device descriptor, as described in the spec: Writes of device and driver descriptors can generally be reordered, but each side (driver and device) are only required to poll (or test) a single location in memory: the next device descriptor after the one they processed previously, in circular order. while this might be sub-optimal, let's do exactly this for now. Cc: stable@vger.kernel.org Cc: Jason Wang Fixes: f51f982682e2a ("virtio_ring: leverage event idx in packed ring") Signed-off-by: Marvin Liu Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index bdc08244a648..a8041e451e9e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1499,9 +1499,6 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) * counter first before updating event flags. */ virtio_wmb(vq->weak_barriers); - } else { - used_idx = vq->last_used_idx; - wrap_counter = vq->packed.used_wrap_counter; } if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { @@ -1518,7 +1515,9 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) */ virtio_mb(vq->weak_barriers); - if (is_used_desc_packed(vq, used_idx, wrap_counter)) { + if (is_used_desc_packed(vq, + vq->last_used_idx, + vq->packed.used_wrap_counter)) { END_USE(vq); return false; } -- cgit v1.2.3 From 6771596169bf585d8d7218f1dc5eb7c2d2663275 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Oct 2019 17:00:51 +0200 Subject: vsock/virtio: remove unused 'work' field from 'struct virtio_vsock_pkt' The 'work' field was introduced with commit 06a8fc78367d0 ("VSOCK: Introduce virtio_vsock_common.ko") but it is never used in the code, so we can remove it to save memory allocated in the per-packet 'struct virtio_vsock_pkt' Suggested-by: Michael S. Tsirkin Signed-off-by: Stefano Garzarella Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_vsock.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 4c7781f4b29b..07875ccc7bb5 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -48,7 +48,6 @@ struct virtio_vsock_sock { struct virtio_vsock_pkt { struct virtio_vsock_hdr hdr; - struct work_struct work; struct list_head list; /* socket refcnt not held, only use for cancellation */ struct vsock_sock *vsk; -- cgit v1.2.3 From b3683dee840274e9997d958b9d82e5de95950f0b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 24 Oct 2019 11:57:18 +0800 Subject: vringh: fix copy direction of vringh_iov_push_kern() We want to copy from iov to buf, so the direction was wrong. Note: no real user for the helper, but it will be used by future features. Signed-off-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vringh.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 08ad0d1f0476..a0a2d74967ef 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -852,6 +852,12 @@ static inline int xfer_kern(void *src, void *dst, size_t len) return 0; } +static inline int kern_xfer(void *dst, void *src, size_t len) +{ + memcpy(dst, src, len); + return 0; +} + /** * vringh_init_kern - initialize a vringh for a kernelspace vring. * @vrh: the vringh to initialize. @@ -958,7 +964,7 @@ EXPORT_SYMBOL(vringh_iov_pull_kern); ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, const void *src, size_t len) { - return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern); + return vringh_iov_xfer(wiov, (void *)src, len, kern_xfer); } EXPORT_SYMBOL(vringh_iov_push_kern); -- cgit v1.2.3 From b234fe9558615098d8d62516e7041ad7f99ebcea Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 22 Oct 2019 22:21:28 +0300 Subject: clk: imx8m: Use SYS_PLL1_800M as intermediate parent of CLK_ARM During cpu frequency switching the main "CLK_ARM" is reparented to an intermediate "step" clock. On imx8mm and imx8mn the 24M oscillator is used for this purpose but it is extremely slow, increasing wakeup latencies to the point that i2c transactions can timeout and system becomes unresponsive. Fix by switching the "step" clk to SYS_PLL1_800M, matching the behavior of imx8m cpufreq drivers in imx vendor tree. This bug was not immediately apparent because upstream arm64 defconfig uses the "performance" governor by default so no cpufreq transitions happen. Fixes: ba5625c3e272 ("clk: imx: Add clock driver support for imx8mm") Fixes: 96d6392b54db ("clk: imx: Add support for i.MX8MN clock driver") Cc: stable@vger.kernel.org Signed-off-by: Leonard Crestez Link: https://lkml.kernel.org/r/f5d2b9c53f1ed5ccb1dd3c6624f56759d92e1689.1571771777.git.leonard.crestez@nxp.com Acked-by: Shawn Guo Signed-off-by: Stephen Boyd --- drivers/clk/imx/clk-imx8mm.c | 2 +- drivers/clk/imx/clk-imx8mn.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-imx8mm.c b/drivers/clk/imx/clk-imx8mm.c index 067ab876911d..172589e94f60 100644 --- a/drivers/clk/imx/clk-imx8mm.c +++ b/drivers/clk/imx/clk-imx8mm.c @@ -638,7 +638,7 @@ static int imx8mm_clocks_probe(struct platform_device *pdev) clks[IMX8MM_CLK_A53_DIV], clks[IMX8MM_CLK_A53_SRC], clks[IMX8MM_ARM_PLL_OUT], - clks[IMX8MM_CLK_24M]); + clks[IMX8MM_SYS_PLL1_800M]); imx_check_clocks(clks, ARRAY_SIZE(clks)); diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c index 47a4b44ba3cb..58b5acee3830 100644 --- a/drivers/clk/imx/clk-imx8mn.c +++ b/drivers/clk/imx/clk-imx8mn.c @@ -610,7 +610,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) clks[IMX8MN_CLK_A53_DIV], clks[IMX8MN_CLK_A53_SRC], clks[IMX8MN_ARM_PLL_OUT], - clks[IMX8MN_CLK_24M]); + clks[IMX8MN_SYS_PLL1_800M]); imx_check_clocks(clks, ARRAY_SIZE(clks)); -- cgit v1.2.3 From 8c7e975667fbc3b7c816119dd56104739899f125 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 25 Oct 2019 15:16:36 +0300 Subject: perf/core: Start rejecting the syscall with attr.__reserved_2 set Commit: 1a5941312414c ("perf: Add wakeup watermark control to the AUX area") added attr.__reserved_2 padding, but forgot to add an ABI check to reject attributes with this field set. Fix that. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: adrian.hunter@intel.com Cc: mathieu.poirier@linaro.org Link: https://lkml.kernel.org/r/20191025121636.75182-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index bb3748d29b04..aec8dba2bea4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10635,7 +10635,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->size = size; - if (attr->__reserved_1) + if (attr->__reserved_1 || attr->__reserved_2) return -EINVAL; if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) -- cgit v1.2.3 From 317b96bb14303c7998dbcd5bc606bd8038fdd4b4 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 23 Oct 2019 10:09:54 -0500 Subject: perf/x86/amd/ibs: Fix reading of the IBS OpData register and thus precise RIP validity The loop that reads all the IBS MSRs into *buf stopped one MSR short of reading the IbsOpData register, which contains the RipInvalid status bit. Fix the offset_max assignment so the MSR gets read, so the RIP invalid evaluation is based on what the IBS h/w output, instead of what was left in memory. Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: d47e8238cd76 ("perf/x86-ibs: Take instruction pointer from ibs sample") Link: https://lkml.kernel.org/r/20191023150955.30292-1-kim.phillips@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/ibs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 5b35b7ea5d72..98ba21a588a1 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -614,7 +614,7 @@ fail: if (event->attr.sample_type & PERF_SAMPLE_RAW) offset_max = perf_ibs->offset_max; else if (check_rip) - offset_max = 2; + offset_max = 3; else offset_max = 1; do { -- cgit v1.2.3 From e431e79b60603079d269e0c2a5177943b95fa4b6 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 23 Oct 2019 10:09:55 -0500 Subject: perf/x86/amd/ibs: Handle erratum #420 only on the affected CPU family (10h) This saves us writing the IBS control MSR twice when disabling the event. I searched revision guides for all families since 10h, and did not find occurrence of erratum #420, nor anything remotely similar: so we isolate the secondary MSR write to family 10h only. Also unconditionally update the count mask for IBS Op implementations that have read & writeable current count (CurCnt) fields in addition to the MaxCnt field. These bits were reserved on prior implementations, and therefore shouldn't have negative impact. Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: c9574fe0bdb9 ("perf/x86-ibs: Implement workaround for IBS erratum #420") Link: https://lkml.kernel.org/r/20191023150955.30292-2-kim.phillips@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/ibs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 98ba21a588a1..26c36357c4c9 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -377,7 +377,8 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { config &= ~perf_ibs->cnt_mask; - wrmsrl(hwc->config_base, config); + if (boot_cpu_data.x86 == 0x10) + wrmsrl(hwc->config_base, config); config &= ~perf_ibs->enable_mask; wrmsrl(hwc->config_base, config); } @@ -553,7 +554,8 @@ static struct perf_ibs perf_ibs_op = { }, .msr = MSR_AMD64_IBSOPCTL, .config_mask = IBS_OP_CONFIG_MASK, - .cnt_mask = IBS_OP_MAX_CNT, + .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | + IBS_OP_CUR_CNT_RAND, .enable_mask = IBS_OP_ENABLE, .valid_mask = IBS_OP_VAL, .max_period = IBS_OP_MAX_CNT << 4, -- cgit v1.2.3 From 75be6f703a141b048590d659a3954c4fedd30bba Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 25 Oct 2019 07:43:13 -0700 Subject: perf/x86/uncore: Fix event group support The events in the same group don't start or stop simultaneously. Here is the ftrace when enabling event group for uncore_iio_0: # perf stat -e "{uncore_iio_0/event=0x1/,uncore_iio_0/event=0xe/}" -0 [000] d.h. 8959.064832: read_msr: a41, value b2b0b030 //Read counter reg of IIO unit0 counter0 -0 [000] d.h. 8959.064835: write_msr: a48, value 400001 //Write Ctrl reg of IIO unit0 counter0 to enable counter0. <------ Although counter0 is enabled, Unit Ctrl is still freezed. Nothing will count. We are still good here. -0 [000] d.h. 8959.064836: read_msr: a40, value 30100 //Read Unit Ctrl reg of IIO unit0 -0 [000] d.h. 8959.064838: write_msr: a40, value 30000 //Write Unit Ctrl reg of IIO unit0 to enable all counters in the unit by clear Freeze bit <------Unit0 is un-freezed. Counter0 has been enabled. Now it starts counting. But counter1 has not been enabled yet. The issue starts here. -0 [000] d.h. 8959.064846: read_msr: a42, value 0 //Read counter reg of IIO unit0 counter1 -0 [000] d.h. 8959.064847: write_msr: a49, value 40000e //Write Ctrl reg of IIO unit0 counter1 to enable counter1. <------ Now, counter1 just starts to count. Counter0 has been running for a while. Current code un-freezes the Unit Ctrl right after the first counter is enabled. The subsequent group events always loses some counter values. Implement pmu_enable and pmu_disable support for uncore, which can help to batch hardware accesses. No one uses uncore_enable_box and uncore_disable_box. Remove them. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-drivers-review@eclists.intel.com Cc: linux-perf@eclists.intel.com Fixes: 087bfbb03269 ("perf/x86: Add generic Intel uncore PMU support") Link: https://lkml.kernel.org/r/1572014593-31591-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 44 ++++++++++++++++++++++++++++++++++++------ arch/x86/events/intel/uncore.h | 12 ------------ 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 6fc2e06ab4c6..86467f85c383 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -502,10 +502,8 @@ void uncore_pmu_event_start(struct perf_event *event, int flags) local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); uncore_enable_event(box, event); - if (box->n_active == 1) { - uncore_enable_box(box); + if (box->n_active == 1) uncore_pmu_start_hrtimer(box); - } } void uncore_pmu_event_stop(struct perf_event *event, int flags) @@ -529,10 +527,8 @@ void uncore_pmu_event_stop(struct perf_event *event, int flags) WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; - if (box->n_active == 0) { - uncore_disable_box(box); + if (box->n_active == 0) uncore_pmu_cancel_hrtimer(box); - } } if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { @@ -778,6 +774,40 @@ static int uncore_pmu_event_init(struct perf_event *event) return ret; } +static void uncore_pmu_enable(struct pmu *pmu) +{ + struct intel_uncore_pmu *uncore_pmu; + struct intel_uncore_box *box; + + uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); + if (!uncore_pmu) + return; + + box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); + if (!box) + return; + + if (uncore_pmu->type->ops->enable_box) + uncore_pmu->type->ops->enable_box(box); +} + +static void uncore_pmu_disable(struct pmu *pmu) +{ + struct intel_uncore_pmu *uncore_pmu; + struct intel_uncore_box *box; + + uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); + if (!uncore_pmu) + return; + + box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); + if (!box) + return; + + if (uncore_pmu->type->ops->disable_box) + uncore_pmu->type->ops->disable_box(box); +} + static ssize_t uncore_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) { @@ -803,6 +833,8 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) pmu->pmu = (struct pmu) { .attr_groups = pmu->type->attr_groups, .task_ctx_nr = perf_invalid_context, + .pmu_enable = uncore_pmu_enable, + .pmu_disable = uncore_pmu_disable, .event_init = uncore_pmu_event_init, .add = uncore_pmu_event_add, .del = uncore_pmu_event_del, diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index f36f7bebbc1b..bbfdaa720b45 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -441,18 +441,6 @@ static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box, return -EINVAL; } -static inline void uncore_disable_box(struct intel_uncore_box *box) -{ - if (box->pmu->type->ops->disable_box) - box->pmu->type->ops->disable_box(box); -} - -static inline void uncore_enable_box(struct intel_uncore_box *box) -{ - if (box->pmu->type->ops->enable_box) - box->pmu->type->ops->enable_box(box); -} - static inline void uncore_disable_event(struct intel_uncore_box *box, struct perf_event *event) { -- cgit v1.2.3 From 652521d460cbfa24ef27717b4b28acfac4281be6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 24 Oct 2019 14:29:04 +0200 Subject: perf/headers: Fix spelling s/EACCESS/EACCES/, s/privilidge/privilege/ As per POSIX, the correct spelling of the error code is EACCES: include/uapi/asm-generic/errno-base.h:#define EACCES 13 /* Permission denied */ Signed-off-by: Geert Uytterhoeven Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Kosina Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/20191024122904.12463-1-geert+renesas@glider.be Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61448c19a132..68ccc5b1913b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -292,7 +292,7 @@ struct pmu { * -EBUSY -- @event is for this PMU but PMU temporarily unavailable * -EINVAL -- @event is for this PMU but @event is not valid * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported - * -EACCESS -- @event is for this PMU, @event is valid, but no privilidges + * -EACCES -- @event is for this PMU, @event is valid, but no privileges * * 0 -- @event is for this PMU and valid * -- cgit v1.2.3 From 1a7f60b9df614bb36d14dc0c0bc898a31b2b506f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 28 Oct 2019 09:10:56 +0100 Subject: Revert "ALSA: hda: Flush interrupts on disabling" This reverts commit caa8422d01e983782548648e125fd617cadcec3f. It turned out that this commit caused a regression at shutdown / reboot, as the synchronize_irq() calls seems blocking the whole shutdown. Also another part of the change about shuffling the call order looks suspicious; the azx_stop_chip() call disables the CORB / RIRB while the others may still need the CORB/RIRB update. Since the original commit itself was a cargo-fix, let's revert the whole patch. Fixes: caa8422d01e9 ("ALSA: hda: Flush interrupts on disabling") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=205333 BugLinK: https://bugs.freedesktop.org/show_bug.cgi?id=111174 Signed-off-by: Takashi Iwai Cc: Chris Wilson Link: https://lore.kernel.org/r/20191028081056.22010-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/hdac_controller.c | 2 -- sound/pci/hda/hda_intel.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index d3999e7b0705..7e7be8e4dcf9 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -447,8 +447,6 @@ static void azx_int_disable(struct hdac_bus *bus) list_for_each_entry(azx_dev, &bus->stream_list, list) snd_hdac_stream_updateb(azx_dev, SD_CTL, SD_INT_MASK, 0); - synchronize_irq(bus->irq); - /* disable SIE for all streams */ snd_hdac_chip_writeb(bus, INTCTL, 0); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index a815bc811799..cf53fbd872ee 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1348,9 +1348,9 @@ static int azx_free(struct azx *chip) } if (bus->chip_init) { - azx_stop_chip(chip); azx_clear_irq_pending(chip); azx_stop_all_streams(chip); + azx_stop_chip(chip); } if (bus->irq >= 0) -- cgit v1.2.3 From f430c7ed8bc22992ed528b518da465b060b9223f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 22 Oct 2019 20:57:06 -0700 Subject: reset: fix reset_control_ops kerneldoc comment Add a missing short description to the reset_control_ops documentation. Signed-off-by: Randy Dunlap [p.zabel@pengutronix.de: rebased and updated commit message] Signed-off-by: Philipp Zabel --- include/linux/reset-controller.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index 984f625d5593..eaae6b4e9f24 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -7,7 +7,7 @@ struct reset_controller_dev; /** - * struct reset_control_ops + * struct reset_control_ops - reset controller driver callbacks * * @reset: for self-deasserting resets, does all necessary * things to reset the device -- cgit v1.2.3 From e346ff93f02b1ba81e976d4e67ec56582dbdf7f1 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 25 Oct 2019 16:39:23 +0800 Subject: arm64: dts: imx8mm: fix compatible string for sdma SDMA in i.MX8MM should use same configuration as i.MX8MQ So need to change compatible string to be "fsl,imx8mq-sdma". Fixes: a05ea40eb384 ("arm64: dts: imx: Add i.mx8mm dtsi support") Signed-off-by: Shengjiu Wang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 58b8cd06cae7..23c8fad7932b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -394,7 +394,7 @@ }; sdma2: dma-controller@302c0000 { - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; reg = <0x302c0000 0x10000>; interrupts = ; clocks = <&clk IMX8MM_CLK_SDMA2_ROOT>, @@ -405,7 +405,7 @@ }; sdma3: dma-controller@302b0000 { - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; reg = <0x302b0000 0x10000>; interrupts = ; clocks = <&clk IMX8MM_CLK_SDMA3_ROOT>, @@ -737,7 +737,7 @@ }; sdma1: dma-controller@30bd0000 { - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; reg = <0x30bd0000 0x10000>; interrupts = ; clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>, -- cgit v1.2.3 From 958c6014c64ef51e5f647d2de527c53e773c8a84 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 25 Oct 2019 16:40:07 +0800 Subject: arm64: dts: imx8mn: fix compatible string for sdma SDMA in i.MX8MN should use same configuration as i.MX8MQ So need to change compatible string to be "fsl,imx8mq-sdma". Fixes: 6c3debcbae47 ("arm64: dts: freescale: Add i.MX8MN dtsi support") Signed-off-by: Shengjiu Wang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 98496f570720..43c4db312146 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -288,7 +288,7 @@ }; sdma3: dma-controller@302b0000 { - compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma"; reg = <0x302b0000 0x10000>; interrupts = ; clocks = <&clk IMX8MN_CLK_SDMA3_ROOT>, @@ -299,7 +299,7 @@ }; sdma2: dma-controller@302c0000 { - compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma"; reg = <0x302c0000 0x10000>; interrupts = ; clocks = <&clk IMX8MN_CLK_SDMA2_ROOT>, @@ -612,7 +612,7 @@ }; sdma1: dma-controller@30bd0000 { - compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma"; reg = <0x30bd0000 0x10000>; interrupts = ; clocks = <&clk IMX8MN_CLK_SDMA1_ROOT>, -- cgit v1.2.3 From f792bd173a6fd51d1a4dde04263085ce67486aa3 Mon Sep 17 00:00:00 2001 From: Keyon Jie Date: Fri, 25 Oct 2019 17:15:38 -0500 Subject: ASoC: SOF: Intel: hda-stream: fix the CONFIG_ prefix missing We are missing the 'CONFIG_' prefix when using the kernel configure item SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1, here correct them. Fixes: 43b2ab9009b13b ('ASoC: SOF: Intel: hda: Disable DMI L1 entry during capture') Signed-off-by: Keyon Jie Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191025221538.6668-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/intel/hda-stream.c b/sound/soc/sof/intel/hda-stream.c index 2c7447188402..0c11fceb28a7 100644 --- a/sound/soc/sof/intel/hda-stream.c +++ b/sound/soc/sof/intel/hda-stream.c @@ -190,7 +190,7 @@ hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction) * Workaround to address a known issue with host DMA that results * in xruns during pause/release in capture scenarios. */ - if (!IS_ENABLED(SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1)) + if (!IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1)) if (stream && direction == SNDRV_PCM_STREAM_CAPTURE) snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, HDA_VS_INTEL_EM2, @@ -228,7 +228,7 @@ int hda_dsp_stream_put(struct snd_sof_dev *sdev, int direction, int stream_tag) spin_unlock_irq(&bus->reg_lock); /* Enable DMI L1 entry if there are no capture streams open */ - if (!IS_ENABLED(SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1)) + if (!IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1)) if (!active_capture_stream) snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, HDA_VS_INTEL_EM2, -- cgit v1.2.3 From c0a333d842ef67ac04adc72ff79dc1ccc3dca4ed Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sun, 27 Oct 2019 14:48:47 -0500 Subject: ASoC: SOF: Fix memory leak in sof_dfsentry_write In the implementation of sof_dfsentry_write() memory allocated for string is leaked in case of an error. Go to error handling path if the d_name.name is not valid. Fixes: 091c12e1f50c ("ASoC: SOF: debug: add new debugfs entries for IPC flood test") Signed-off-by: Navid Emamdoost Link: https://lore.kernel.org/r/20191027194856.4056-1-navid.emamdoost@gmail.com Signed-off-by: Mark Brown --- sound/soc/sof/debug.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c index 54cd431faab7..5529e8eeca46 100644 --- a/sound/soc/sof/debug.c +++ b/sound/soc/sof/debug.c @@ -152,8 +152,10 @@ static ssize_t sof_dfsentry_write(struct file *file, const char __user *buffer, */ dentry = file->f_path.dentry; if (strcmp(dentry->d_name.name, "ipc_flood_count") && - strcmp(dentry->d_name.name, "ipc_flood_duration_ms")) - return -EINVAL; + strcmp(dentry->d_name.name, "ipc_flood_duration_ms")) { + ret = -EINVAL; + goto out; + } if (!strcmp(dentry->d_name.name, "ipc_flood_duration_ms")) flood_duration_test = true; -- cgit v1.2.3 From 45c1380358b12bf2d1db20a5874e9544f56b34ab Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sun, 27 Oct 2019 16:53:24 -0500 Subject: ASoC: SOF: ipc: Fix memory leak in sof_set_get_large_ctrl_data In the implementation of sof_set_get_large_ctrl_data() there is a memory leak in case an error. Release partdata if sof_get_ctrl_copy_params() fails. Fixes: 54d198d5019d ("ASoC: SOF: Propagate sof_get_ctrl_copy_params() error properly") Signed-off-by: Navid Emamdoost Link: https://lore.kernel.org/r/20191027215330.12729-1-navid.emamdoost@gmail.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c index b2f359d2f7e5..086eeeab8679 100644 --- a/sound/soc/sof/ipc.c +++ b/sound/soc/sof/ipc.c @@ -572,8 +572,10 @@ static int sof_set_get_large_ctrl_data(struct snd_sof_dev *sdev, else err = sof_get_ctrl_copy_params(cdata->type, partdata, cdata, sparams); - if (err < 0) + if (err < 0) { + kfree(partdata); return err; + } msg_bytes = sparams->msg_bytes; pl_size = sparams->pl_size; -- cgit v1.2.3 From dd7e8d903e1eef5a9234a2d69663dcbfeab79571 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 28 Oct 2019 13:52:07 +0200 Subject: ASoC: ti: sdma-pcm: Add back the flags parameter for non standard dma names When non standard names are used it is possible that one of the directions are not provided, thus the flags needs to be present to tell the core that we have half duplex setup. Fixes: 642aafea8889 ("ASoC: ti: remove compat dma probing") Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20191028115207.5142-1-peter.ujfalusi@ti.com Signed-off-by: Mark Brown --- sound/soc/ti/sdma-pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/ti/sdma-pcm.c b/sound/soc/ti/sdma-pcm.c index a236350beb10..2b0bc234e1b6 100644 --- a/sound/soc/ti/sdma-pcm.c +++ b/sound/soc/ti/sdma-pcm.c @@ -62,7 +62,7 @@ int sdma_pcm_platform_register(struct device *dev, config->chan_names[0] = txdmachan; config->chan_names[1] = rxdmachan; - return devm_snd_dmaengine_pcm_register(dev, config, 0); + return devm_snd_dmaengine_pcm_register(dev, config, flags); } EXPORT_SYMBOL_GPL(sdma_pcm_platform_register); -- cgit v1.2.3 From 07c1b73e2a027ea9a52677beeb6a943a3e357139 Mon Sep 17 00:00:00 2001 From: Cheng-Yi Chiang Date: Mon, 28 Oct 2019 17:52:29 +0800 Subject: ASoC: rockchip: rockchip_max98090: Enable SHDN to fix headset detection max98090 spec states that chip needs to be in turned-on state to supply mic bias. Enable SHDN dapm widget along with MICBIAS widget to actually turn on mic bias for proper headset button detection. This is similar to cht_ti_jack_event in sound/soc/intel/boards/cht_bsw_max98090_ti.c. Note that due to ts3a227e reports the jack event right away before the notifier is registered, if headset is plugged on boot, headset button will not get detected until headset is unplugged and plugged. This is still an issue to be fixed. Signed-off-by: Cheng-Yi Chiang Link: https://lore.kernel.org/r/20191028095229.99438-1-cychiang@chromium.org Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_max98090.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/rockchip/rockchip_max98090.c b/sound/soc/rockchip/rockchip_max98090.c index 0097df1fae66..e80b09143b63 100644 --- a/sound/soc/rockchip/rockchip_max98090.c +++ b/sound/soc/rockchip/rockchip_max98090.c @@ -66,10 +66,13 @@ static int rk_jack_event(struct notifier_block *nb, unsigned long event, struct snd_soc_jack *jack = (struct snd_soc_jack *)data; struct snd_soc_dapm_context *dapm = &jack->card->dapm; - if (event & SND_JACK_MICROPHONE) + if (event & SND_JACK_MICROPHONE) { snd_soc_dapm_force_enable_pin(dapm, "MICBIAS"); - else + snd_soc_dapm_force_enable_pin(dapm, "SHDN"); + } else { snd_soc_dapm_disable_pin(dapm, "MICBIAS"); + snd_soc_dapm_disable_pin(dapm, "SHDN"); + } snd_soc_dapm_sync(dapm); -- cgit v1.2.3 From 658fd65cf0b0d511de1718e48d9a28844c385ae0 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 20 Sep 2019 17:39:06 +0200 Subject: clk: at91: avoid sleeping early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not allowed to sleep to early in the boot process and this may lead to kernel issues if the bootloader didn't prepare the slow clock and main clock. This results in the following error and dump stack on the AriettaG25: bad: scheduling from the idle thread! Ensure it is possible to sleep, else simply have a delay. Reported-by: Uwe Kleine-König Signed-off-by: Alexandre Belloni Link: https://lkml.kernel.org/r/20190920153906.20887-1-alexandre.belloni@bootlin.com Fixes: 80eded6ce8bb ("clk: at91: add slow clks driver") Tested-by: Uwe Kleine-König Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-main.c | 5 ++++- drivers/clk/at91/sckc.c | 20 ++++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c index 87083b3a2769..37c22667e831 100644 --- a/drivers/clk/at91/clk-main.c +++ b/drivers/clk/at91/clk-main.c @@ -297,7 +297,10 @@ static int clk_main_probe_frequency(struct regmap *regmap) regmap_read(regmap, AT91_CKGR_MCFR, &mcfr); if (mcfr & AT91_PMC_MAINRDY) return 0; - usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT); + if (system_state < SYSTEM_RUNNING) + udelay(MAINF_LOOP_MIN_WAIT); + else + usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT); } while (time_before(prep_time, timeout)); return -ETIMEDOUT; diff --git a/drivers/clk/at91/sckc.c b/drivers/clk/at91/sckc.c index 9bfe9a28294a..fac0ca56d42d 100644 --- a/drivers/clk/at91/sckc.c +++ b/drivers/clk/at91/sckc.c @@ -76,7 +76,10 @@ static int clk_slow_osc_prepare(struct clk_hw *hw) writel(tmp | osc->bits->cr_osc32en, sckcr); - usleep_range(osc->startup_usec, osc->startup_usec + 1); + if (system_state < SYSTEM_RUNNING) + udelay(osc->startup_usec); + else + usleep_range(osc->startup_usec, osc->startup_usec + 1); return 0; } @@ -187,7 +190,10 @@ static int clk_slow_rc_osc_prepare(struct clk_hw *hw) writel(readl(sckcr) | osc->bits->cr_rcen, sckcr); - usleep_range(osc->startup_usec, osc->startup_usec + 1); + if (system_state < SYSTEM_RUNNING) + udelay(osc->startup_usec); + else + usleep_range(osc->startup_usec, osc->startup_usec + 1); return 0; } @@ -288,7 +294,10 @@ static int clk_sam9x5_slow_set_parent(struct clk_hw *hw, u8 index) writel(tmp, sckcr); - usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1); + if (system_state < SYSTEM_RUNNING) + udelay(SLOWCK_SW_TIME_USEC); + else + usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1); return 0; } @@ -533,7 +542,10 @@ static int clk_sama5d4_slow_osc_prepare(struct clk_hw *hw) return 0; } - usleep_range(osc->startup_usec, osc->startup_usec + 1); + if (system_state < SYSTEM_RUNNING) + udelay(osc->startup_usec); + else + usleep_range(osc->startup_usec, osc->startup_usec + 1); osc->prepared = true; return 0; -- cgit v1.2.3 From 044c1ab399afbe9f2ebef49a3204ef1509826dc7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Oct 2019 09:15:33 -0600 Subject: io_uring: don't touch ctx in setup after ring fd install syzkaller reported an issue where it looks like a malicious app can trigger a use-after-free of reading the ctx ->sq_array and ->rings value right after having installed the ring fd in the process file table. Defer ring fd installation until after we're done reading those values. Fixes: 75b28affdd6a ("io_uring: allocate the two rings together") Reported-by: syzbot+6f03d895a6cd0d06187f@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ba1431046c98..c11c4157a4c2 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3829,10 +3829,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) if (ret) goto err; - ret = io_uring_get_fd(ctx); - if (ret < 0) - goto err; - memset(&p->sq_off, 0, sizeof(p->sq_off)); p->sq_off.head = offsetof(struct io_rings, sq.head); p->sq_off.tail = offsetof(struct io_rings, sq.tail); @@ -3850,6 +3846,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); p->cq_off.cqes = offsetof(struct io_rings, cqes); + /* + * Install ring fd as the very last thing, so we don't risk someone + * having closed it before we finish setup + */ + ret = io_uring_get_fd(ctx); + if (ret < 0) + goto err; + p->features = IORING_FEAT_SINGLE_MMAP; return ret; err: -- cgit v1.2.3 From d482c7bb0541d19dea8bff437a9f3c5563b5b2d2 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 28 Oct 2019 10:52:35 -0400 Subject: USB: Skip endpoints with 0 maxpacket length Endpoints with a maxpacket length of 0 are probably useless. They can't transfer any data, and it's not at all unlikely that an HCD will crash or hang when trying to handle an URB for such an endpoint. Currently the USB core does not check for endpoints having a maxpacket value of 0. This patch adds a check, printing a warning and skipping over any endpoints it catches. Now, the USB spec does not rule out endpoints having maxpacket = 0. But since they wouldn't have any practical use, there doesn't seem to be any good reason for us to accept them. Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1910281050420.1485-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 151a74a54386..1ac1095bfeac 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -348,6 +348,11 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, /* Validate the wMaxPacketSize field */ maxp = usb_endpoint_maxp(&endpoint->desc); + if (maxp == 0) { + dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has wMaxPacketSize 0, skipping\n", + cfgno, inum, asnum, d->bEndpointAddress); + goto skip_to_next_endpoint_or_interface_descriptor; + } /* Find the highest legal maxpacket size for this endpoint */ i = 0; /* additional transactions per microframe */ -- cgit v1.2.3 From d98ee2a19c3334e9343df3ce254b496f1fc428eb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 22 Oct 2019 16:32:02 +0200 Subject: USB: ldusb: fix ring-buffer locking The custom ring-buffer implementation was merged without any locking or explicit memory barriers, but a spinlock was later added by commit 9d33efd9a791 ("USB: ldusb bugfix"). The lock did not cover the update of the tail index once the entry had been processed, something which could lead to memory corruption on weakly ordered architectures or due to compiler optimisations. Specifically, a completion handler running on another CPU might observe the incremented tail index and update the entry before ld_usb_read() is done with it. Fixes: 2824bd250f0b ("[PATCH] USB: add ldusb driver") Fixes: 9d33efd9a791 ("USB: ldusb bugfix") Cc: stable # 2.6.13 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191022143203.5260-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ldusb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 15b5f06fb0b3..c3e764909fd0 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -495,11 +495,11 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, retval = -EFAULT; goto unlock_exit; } - dev->ring_tail = (dev->ring_tail+1) % ring_buffer_size; - retval = bytes_to_read; spin_lock_irq(&dev->rbsl); + dev->ring_tail = (dev->ring_tail + 1) % ring_buffer_size; + if (dev->buffer_overflow) { dev->buffer_overflow = 0; spin_unlock_irq(&dev->rbsl); -- cgit v1.2.3 From 88f6bf3846ee90bf33aa1ce848cd3bfb3229f4a4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 22 Oct 2019 16:32:03 +0200 Subject: USB: ldusb: use unsigned size format specifiers A recent info-leak bug manifested itself along with warning about a negative buffer overflow: ldusb 1-1:0.28: Read buffer overflow, -131383859965943 bytes dropped when it was really a rather large positive one. A sanity check that prevents this has now been put in place, but let's fix up the size format specifiers, which should all be unsigned. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191022143203.5260-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ldusb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index c3e764909fd0..dd1ea25e42b1 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -487,7 +487,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, } bytes_to_read = min(count, *actual_buffer); if (bytes_to_read < *actual_buffer) - dev_warn(&dev->intf->dev, "Read buffer overflow, %zd bytes dropped\n", + dev_warn(&dev->intf->dev, "Read buffer overflow, %zu bytes dropped\n", *actual_buffer-bytes_to_read); /* copy one interrupt_in_buffer from ring_buffer into userspace */ @@ -562,8 +562,9 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, /* write the data into interrupt_out_buffer from userspace */ bytes_to_write = min(count, write_buffer_size*dev->interrupt_out_endpoint_size); if (bytes_to_write < count) - dev_warn(&dev->intf->dev, "Write buffer overflow, %zd bytes dropped\n", count-bytes_to_write); - dev_dbg(&dev->intf->dev, "%s: count = %zd, bytes_to_write = %zd\n", + dev_warn(&dev->intf->dev, "Write buffer overflow, %zu bytes dropped\n", + count - bytes_to_write); + dev_dbg(&dev->intf->dev, "%s: count = %zu, bytes_to_write = %zu\n", __func__, count, bytes_to_write); if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write)) { -- cgit v1.2.3 From 52403cfbc635d28195167618690595013776ebde Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 22 Oct 2019 17:31:27 +0200 Subject: USB: ldusb: fix control-message timeout USB control-message timeouts are specified in milliseconds, not jiffies. Waiting 83 minutes for a transfer to complete is a bit excessive. Fixes: 2824bd250f0b ("[PATCH] USB: add ldusb driver") Cc: stable # 2.6.13 Reported-by: syzbot+a4fbb3bb76cda0ea4e58@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191022153127.22295-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ldusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index dd1ea25e42b1..8f86b4ebca89 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -581,7 +581,7 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, 1 << 8, 0, dev->interrupt_out_buffer, bytes_to_write, - USB_CTRL_SET_TIMEOUT * HZ); + USB_CTRL_SET_TIMEOUT); if (retval < 0) dev_err(&dev->intf->dev, "Couldn't submit HID_REQ_SET_REPORT %d\n", -- cgit v1.2.3 From 18b74067ac78a2dea65783314c13df98a53d071c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 25 Oct 2019 17:30:27 +0300 Subject: xhci: Fix use-after-free regression in xhci clear hub TT implementation commit ef513be0a905 ("usb: xhci: Add Clear_TT_Buffer") schedules work to clear TT buffer, but causes a use-after-free regression at the same time Make sure hub_tt_work finishes before endpoint is disabled, otherwise the work will dereference already freed endpoint and device related pointers. This was triggered when usb core failed to read the configuration descriptor of a FS/LS device during enumeration. xhci driver queued clear_tt_work while usb core freed and reallocated a new device for the next enumeration attempt. EHCI driver implents ehci_endpoint_disable() that makes sure clear_tt_work has finished before it returns, but xhci lacks this support. usb core will call hcd->driver->endpoint_disable() callback before disabling endpoints, so we want this in xhci as well. The added xhci_endpoint_disable() is based on ehci_endpoint_disable() Fixes: ef513be0a905 ("usb: xhci: Add Clear_TT_Buffer") Cc: # v5.3 Reported-by: Johan Hovold Suggested-by: Johan Hovold Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/1572013829-14044-2-git-send-email-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 54 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 517ec3206f6e..6c17e3fe181a 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3071,6 +3071,48 @@ void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci, unsigned int ep_index, } } +static void xhci_endpoint_disable(struct usb_hcd *hcd, + struct usb_host_endpoint *host_ep) +{ + struct xhci_hcd *xhci; + struct xhci_virt_device *vdev; + struct xhci_virt_ep *ep; + struct usb_device *udev; + unsigned long flags; + unsigned int ep_index; + + xhci = hcd_to_xhci(hcd); +rescan: + spin_lock_irqsave(&xhci->lock, flags); + + udev = (struct usb_device *)host_ep->hcpriv; + if (!udev || !udev->slot_id) + goto done; + + vdev = xhci->devs[udev->slot_id]; + if (!vdev) + goto done; + + ep_index = xhci_get_endpoint_index(&host_ep->desc); + ep = &vdev->eps[ep_index]; + if (!ep) + goto done; + + /* wait for hub_tt_work to finish clearing hub TT */ + if (ep->ep_state & EP_CLEARING_TT) { + spin_unlock_irqrestore(&xhci->lock, flags); + schedule_timeout_uninterruptible(1); + goto rescan; + } + + if (ep->ep_state) + xhci_dbg(xhci, "endpoint disable with ep_state 0x%x\n", + ep->ep_state); +done: + host_ep->hcpriv = NULL; + spin_unlock_irqrestore(&xhci->lock, flags); +} + /* * Called after usb core issues a clear halt control message. * The host side of the halt should already be cleared by a reset endpoint @@ -5238,20 +5280,13 @@ static void xhci_clear_tt_buffer_complete(struct usb_hcd *hcd, unsigned int ep_index; unsigned long flags; - /* - * udev might be NULL if tt buffer is cleared during a failed device - * enumeration due to a halted control endpoint. Usb core might - * have allocated a new udev for the next enumeration attempt. - */ - xhci = hcd_to_xhci(hcd); + + spin_lock_irqsave(&xhci->lock, flags); udev = (struct usb_device *)ep->hcpriv; - if (!udev) - return; slot_id = udev->slot_id; ep_index = xhci_get_endpoint_index(&ep->desc); - spin_lock_irqsave(&xhci->lock, flags); xhci->devs[slot_id]->eps[ep_index].ep_state &= ~EP_CLEARING_TT; xhci_ring_doorbell_for_active_rings(xhci, slot_id, ep_index); spin_unlock_irqrestore(&xhci->lock, flags); @@ -5288,6 +5323,7 @@ static const struct hc_driver xhci_hc_driver = { .free_streams = xhci_free_streams, .add_endpoint = xhci_add_endpoint, .drop_endpoint = xhci_drop_endpoint, + .endpoint_disable = xhci_endpoint_disable, .endpoint_reset = xhci_endpoint_reset, .check_bandwidth = xhci_check_bandwidth, .reset_bandwidth = xhci_reset_bandwidth, -- cgit v1.2.3 From bfa3dbb343f664573292afb9e44f9abeb81a19de Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 25 Oct 2019 17:30:28 +0300 Subject: usb: xhci: fix Immediate Data Transfer endianness The arguments to queue_trb are always byteswapped to LE for placement in the ring, but this should not happen in the case of immediate data; the bytes copied out of transfer_buffer are already in the correct order. Add a complementary byteswap so the bytes end up in the ring correctly. This was observed on BE ppc64 with a "Texas Instruments TUSB73x0 SuperSpeed USB 3.0 xHCI Host Controller [104c:8241]" as a ch341 usb-serial adapter ("1a86:7523 QinHeng Electronics HL-340 USB-Serial adapter") always transmitting the same character (generally NUL) over the serial link regardless of the key pressed. Cc: # 5.2+ Fixes: 33e39350ebd2 ("usb: xhci: add Immediate Data Transfer support") Signed-off-by: Samuel Holland Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/1572013829-14044-3-git-send-email-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 85ceb43e3405..e7aab31fd9a5 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3330,6 +3330,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, if (xhci_urb_suitable_for_idt(urb)) { memcpy(&send_addr, urb->transfer_buffer, trb_buff_len); + le64_to_cpus(&send_addr); field |= TRB_IDT; } } @@ -3475,6 +3476,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, if (xhci_urb_suitable_for_idt(urb)) { memcpy(&addr, urb->transfer_buffer, urb->transfer_buffer_length); + le64_to_cpus(&addr); field |= TRB_IDT; } else { addr = (u64) urb->transfer_dma; -- cgit v1.2.3 From d5501d5c29a2e684640507cfee428178d6fd82ca Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Fri, 25 Oct 2019 17:30:29 +0300 Subject: usb: xhci: fix __le32/__le64 accessors in debugfs code It looks like some of the xhci debug code is passing u32 to functions directly from __le32/__le64 fields. Fix this by using le{32,64}_to_cpu() on these to fix the following sparse warnings; xhci-debugfs.c:205:62: warning: incorrect type in argument 1 (different base types) xhci-debugfs.c:205:62: expected unsigned int [usertype] field0 xhci-debugfs.c:205:62: got restricted __le32 xhci-debugfs.c:206:62: warning: incorrect type in argument 2 (different base types) xhci-debugfs.c:206:62: expected unsigned int [usertype] field1 xhci-debugfs.c:206:62: got restricted __le32 ... [Trim down commit message, sparse warnings were similar -Mathias] Cc: # 4.15+ Signed-off-by: Ben Dooks Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/1572013829-14044-4-git-send-email-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-debugfs.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index 7ba6afc7ef23..76c3f29562d2 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -202,10 +202,10 @@ static void xhci_ring_dump_segment(struct seq_file *s, trb = &seg->trbs[i]; dma = seg->dma + i * sizeof(*trb); seq_printf(s, "%pad: %s\n", &dma, - xhci_decode_trb(trb->generic.field[0], - trb->generic.field[1], - trb->generic.field[2], - trb->generic.field[3])); + xhci_decode_trb(le32_to_cpu(trb->generic.field[0]), + le32_to_cpu(trb->generic.field[1]), + le32_to_cpu(trb->generic.field[2]), + le32_to_cpu(trb->generic.field[3]))); } } @@ -263,10 +263,10 @@ static int xhci_slot_context_show(struct seq_file *s, void *unused) xhci = hcd_to_xhci(bus_to_hcd(dev->udev->bus)); slot_ctx = xhci_get_slot_ctx(xhci, dev->out_ctx); seq_printf(s, "%pad: %s\n", &dev->out_ctx->dma, - xhci_decode_slot_context(slot_ctx->dev_info, - slot_ctx->dev_info2, - slot_ctx->tt_info, - slot_ctx->dev_state)); + xhci_decode_slot_context(le32_to_cpu(slot_ctx->dev_info), + le32_to_cpu(slot_ctx->dev_info2), + le32_to_cpu(slot_ctx->tt_info), + le32_to_cpu(slot_ctx->dev_state))); return 0; } @@ -286,10 +286,10 @@ static int xhci_endpoint_context_show(struct seq_file *s, void *unused) ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, dci); dma = dev->out_ctx->dma + dci * CTX_SIZE(xhci->hcc_params); seq_printf(s, "%pad: %s\n", &dma, - xhci_decode_ep_context(ep_ctx->ep_info, - ep_ctx->ep_info2, - ep_ctx->deq, - ep_ctx->tx_info)); + xhci_decode_ep_context(le32_to_cpu(ep_ctx->ep_info), + le32_to_cpu(ep_ctx->ep_info2), + le64_to_cpu(ep_ctx->deq), + le32_to_cpu(ep_ctx->tx_info))); } return 0; -- cgit v1.2.3 From 28df0642abbf6d66908a2858922a7e4b21cdd8c2 Mon Sep 17 00:00:00 2001 From: GwanYeong Kim Date: Fri, 18 Oct 2019 03:22:23 +0000 Subject: usbip: tools: Fix read_usb_vudc_device() error path handling This isn't really accurate right. fread() doesn't always return 0 in error. It could return < number of elements and set errno. Signed-off-by: GwanYeong Kim Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20191018032223.4644-1-gy741.kim@gmail.com Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/usbip_device_driver.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.c b/tools/usb/usbip/libsrc/usbip_device_driver.c index 051d7d3f443b..927a151fa9aa 100644 --- a/tools/usb/usbip/libsrc/usbip_device_driver.c +++ b/tools/usb/usbip/libsrc/usbip_device_driver.c @@ -69,7 +69,7 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) FILE *fd = NULL; struct udev_device *plat; const char *speed; - int ret = 0; + size_t ret; plat = udev_device_get_parent(sdev); path = udev_device_get_syspath(plat); @@ -79,8 +79,10 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) if (!fd) return -1; ret = fread((char *) &descr, sizeof(descr), 1, fd); - if (ret < 0) + if (ret != 1) { + err("Cannot read vudc device descr file: %s", strerror(errno)); goto err; + } fclose(fd); copy_descr_attr(dev, &descr, bDeviceClass); -- cgit v1.2.3 From d4d8257754c3300ea2a465dadf8d2b02c713c920 Mon Sep 17 00:00:00 2001 From: Suwan Kim Date: Tue, 22 Oct 2019 18:30:17 +0900 Subject: usbip: Fix free of unallocated memory in vhci tx iso_buffer should be set to NULL after use and free in the while loop. In the case of isochronous URB in the while loop, iso_buffer is allocated and after sending it to server, buffer is deallocated. And then, if the next URB in the while loop is not a isochronous pipe, iso_buffer still holds the previously deallocated buffer address and kfree tries to free wrong buffer address. Fixes: ea44d190764b ("usbip: Implement SG support to vhci-hcd and stub driver") Reported-by: kbuild test robot Reported-by: Julia Lawall Signed-off-by: Suwan Kim Reviewed-by: Julia Lawall Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20191022093017.8027-1-suwan.kim027@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c index c3803785f6ef..0ae40a13a9fe 100644 --- a/drivers/usb/usbip/vhci_tx.c +++ b/drivers/usb/usbip/vhci_tx.c @@ -147,7 +147,10 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) } kfree(iov); + /* This is only for isochronous case */ kfree(iso_buffer); + iso_buffer = NULL; + usbip_dbg_vhci_tx("send txdata\n"); total_size += txsize; -- cgit v1.2.3 From 9a976949613132977098fc49510b46fa8678d864 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 21 Oct 2019 11:48:06 -0400 Subject: usb-storage: Revert commit 747668dbc061 ("usb-storage: Set virt_boundary_mask to avoid SG overflows") Commit 747668dbc061 ("usb-storage: Set virt_boundary_mask to avoid SG overflows") attempted to solve a problem involving scatter-gather I/O and USB/IP by setting the virt_boundary_mask for mass-storage devices. However, it now turns out that this interacts badly with commit 09324d32d2a0 ("block: force an unlimited segment size on queues with a virt boundary"), which was added later. A typical error message is: ehci-pci 0000:00:13.2: swiotlb buffer is full (sz: 327680 bytes), total 32768 (slots), used 97 (slots) There is no longer any reason to keep the virt_boundary_mask setting for usb-storage. It was needed in the first place only for handling devices with a block size smaller than the maxpacket size and where the host controller was not capable of fully general scatter-gather operation (that is, able to merge two SG segments into a single USB packet). But: High-speed or slower connections never use a bulk maxpacket value larger than 512; The SCSI layer does not handle block devices with a block size smaller than 512 bytes; All the host controllers capable of SuperSpeed operation can handle fully general SG; Since commit ea44d190764b ("usbip: Implement SG support to vhci-hcd and stub driver") was merged, the USB/IP driver can also handle SG. Therefore all supported device/controller combinations should be okay with no need for any special virt_boundary_mask. So in order to fix the swiotlb problem, this patch reverts commit 747668dbc061. Reported-and-tested-by: Piergiorgio Sartor Link: https://marc.info/?l=linux-usb&m=157134199501202&w=2 Signed-off-by: Alan Stern CC: Seth Bollinger CC: Fixes: 747668dbc061 ("usb-storage: Set virt_boundary_mask to avoid SG overflows") Acked-by: Christoph Hellwig Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1910211145520.1673-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/scsiglue.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 6737fab94959..54a3c8195c96 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -68,7 +68,6 @@ static const char* host_info(struct Scsi_Host *host) static int slave_alloc (struct scsi_device *sdev) { struct us_data *us = host_to_us(sdev->host); - int maxp; /* * Set the INQUIRY transfer length to 36. We don't use any of @@ -77,15 +76,6 @@ static int slave_alloc (struct scsi_device *sdev) */ sdev->inquiry_len = 36; - /* - * USB has unusual scatter-gather requirements: the length of each - * scatterlist element except the last must be divisible by the - * Bulk maxpacket value. Fortunately this value is always a - * power of 2. Inform the block layer about this requirement. - */ - maxp = usb_maxpacket(us->pusb_dev, us->recv_bulk_pipe, 0); - blk_queue_virt_boundary(sdev->request_queue, maxp - 1); - /* * Some host controllers may have alignment requirements. * We'll play it safe by requiring 512-byte alignment always. -- cgit v1.2.3 From 1186f86a71130a7635a20843e355bb880c7349b2 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 23 Oct 2019 11:34:33 -0400 Subject: UAS: Revert commit 3ae62a42090f ("UAS: fix alignment of scatter/gather segments") Commit 3ae62a42090f ("UAS: fix alignment of scatter/gather segments"), copying a similar commit for usb-storage, attempted to solve a problem involving scatter-gather I/O and USB/IP by setting the virt_boundary_mask for mass-storage devices. However, it now turns out that the analogous change in usb-storage interacted badly with commit 09324d32d2a0 ("block: force an unlimited segment size on queues with a virt boundary"), which was added later. A typical error message is: ehci-pci 0000:00:13.2: swiotlb buffer is full (sz: 327680 bytes), total 32768 (slots), used 97 (slots) There is no longer any reason to keep the virt_boundary_mask setting in the uas driver. It was needed in the first place only for handling devices with a block size smaller than the maxpacket size and where the host controller was not capable of fully general scatter-gather operation (that is, able to merge two SG segments into a single USB packet). But: High-speed or slower connections never use a bulk maxpacket value larger than 512; The SCSI layer does not handle block devices with a block size smaller than 512 bytes; All the host controllers capable of SuperSpeed operation can handle fully general SG; Since commit ea44d190764b ("usbip: Implement SG support to vhci-hcd and stub driver") was merged, the USB/IP driver can also handle SG. Therefore all supported device/controller combinations should be okay with no need for any special virt_boundary_mask. So in order to head off potential problems similar to those affecting usb-storage, this patch reverts commit 3ae62a42090f. Signed-off-by: Alan Stern CC: Oliver Neukum CC: Acked-by: Christoph Hellwig Fixes: 3ae62a42090f ("UAS: fix alignment of scatter/gather segments") Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1910231132470.1878-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index bf80d6f81f58..34538253f12c 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -789,29 +789,9 @@ static int uas_slave_alloc(struct scsi_device *sdev) { struct uas_dev_info *devinfo = (struct uas_dev_info *)sdev->host->hostdata; - int maxp; sdev->hostdata = devinfo; - /* - * We have two requirements here. We must satisfy the requirements - * of the physical HC and the demands of the protocol, as we - * definitely want no additional memory allocation in this path - * ruling out using bounce buffers. - * - * For a transmission on USB to continue we must never send - * a package that is smaller than maxpacket. Hence the length of each - * scatterlist element except the last must be divisible by the - * Bulk maxpacket value. - * If the HC does not ensure that through SG, - * the upper layer must do that. We must assume nothing - * about the capabilities off the HC, so we use the most - * pessimistic requirement. - */ - - maxp = usb_maxpacket(devinfo->udev, devinfo->data_in_pipe, 0); - blk_queue_virt_boundary(sdev->request_queue, maxp - 1); - /* * The protocol has no requirements on alignment in the strict sense. * Controllers may or may not have alignment restrictions. -- cgit v1.2.3 From 1524b12a6e02a85264af4ed208b034a2239ef374 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Oct 2019 23:49:13 +0000 Subject: RDMA/mlx5: Use irq xarray locking for mkey_table The mkey_table xarray is touched by the reg_mr_callback() function which is called from a hard irq. Thus all other uses of xa_lock must use the _irq variants. WARNING: inconsistent lock state 5.4.0-rc1 #12 Not tainted -------------------------------- inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. python3/343 [HC0[0]:SC0[0]:HE1:SE1] takes: ffff888182be1d40 (&(&xa->xa_lock)->rlock#3){?.-.}, at: xa_erase+0x12/0x30 {IN-HARDIRQ-W} state was registered at: lock_acquire+0xe1/0x200 _raw_spin_lock_irqsave+0x35/0x50 reg_mr_callback+0x2dd/0x450 [mlx5_ib] mlx5_cmd_exec_cb_handler+0x2c/0x70 [mlx5_core] mlx5_cmd_comp_handler+0x355/0x840 [mlx5_core] [..] Possible unsafe locking scenario: CPU0 ---- lock(&(&xa->xa_lock)->rlock#3); lock(&(&xa->xa_lock)->rlock#3); *** DEADLOCK *** 2 locks held by python3/343: #0: ffff88818eb4bd38 (&uverbs_dev->disassociate_srcu){....}, at: ib_uverbs_ioctl+0xe5/0x1e0 [ib_uverbs] #1: ffff888176c76d38 (&file->hw_destroy_rwsem){++++}, at: uobj_destroy+0x2d/0x90 [ib_uverbs] stack backtrace: CPU: 3 PID: 343 Comm: python3 Not tainted 5.4.0-rc1 #12 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x86/0xca print_usage_bug.cold.50+0x2e5/0x355 mark_lock+0x871/0xb50 ? match_held_lock+0x20/0x250 ? check_usage_forwards+0x240/0x240 __lock_acquire+0x7de/0x23a0 ? __kasan_check_read+0x11/0x20 ? mark_lock+0xae/0xb50 ? mark_held_locks+0xb0/0xb0 ? find_held_lock+0xca/0xf0 lock_acquire+0xe1/0x200 ? xa_erase+0x12/0x30 _raw_spin_lock+0x2a/0x40 ? xa_erase+0x12/0x30 xa_erase+0x12/0x30 mlx5_ib_dealloc_mw+0x55/0xa0 [mlx5_ib] uverbs_dealloc_mw+0x3c/0x70 [ib_uverbs] uverbs_free_mw+0x1a/0x20 [ib_uverbs] destroy_hw_idr_uobject+0x49/0xa0 [ib_uverbs] [..] Fixes: 0417791536ae ("RDMA/mlx5: Add missing synchronize_srcu() for MW cases") Link: https://lore.kernel.org/r/20191024234910.GA9038@ziepe.ca Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 630599311586..7019c12005f4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1967,8 +1967,8 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) int err; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - xa_erase(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(mmw->mmkey.key)); + xa_erase_irq(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(mmw->mmkey.key)); /* * pagefault_single_data_segment() may be accessing mmw under * SRCU if the user bound an ODP MR to this MW. -- cgit v1.2.3 From d4934f45693651ea15357dd6c7c36be28b6da884 Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Fri, 25 Oct 2019 18:04:40 +0530 Subject: RDMA/iw_cxgb4: Avoid freeing skb twice in arp failure case _put_ep_safe() and _put_pass_ep_safe() free the skb before it is freed by process_work(). fix double free by freeing the skb only in process_work(). Fixes: 1dad0ebeea1c ("iw_cxgb4: Avoid touch after free error in ARP failure handlers") Link: https://lore.kernel.org/r/1572006880-5800-1-git-send-email-bharat@chelsio.com Signed-off-by: Dakshaja Uppalapati Signed-off-by: Potnuri Bharat Teja Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 9e8eca7b613c..347dc242fb88 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -495,7 +495,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); release_ep_resources(ep); - kfree_skb(skb); return 0; } @@ -506,7 +505,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); c4iw_put_ep(&ep->parent_ep->com); release_ep_resources(ep); - kfree_skb(skb); return 0; } -- cgit v1.2.3 From 00a5bf3a8ca30d19f24219fc3cfb74f4eab3600d Mon Sep 17 00:00:00 2001 From: Yash Shah Date: Fri, 25 Oct 2019 08:30:03 +0000 Subject: RISC-V: Add PCIe I/O BAR memory mapping For legacy I/O BARs (non-MMIO BARs) to work correctly on RISC-V Linux, we need to establish a reserved memory region for them, so that drivers that wish to use the legacy I/O BARs can issue reads and writes against a memory region that is mapped to the host PCIe controller's I/O BAR mapping. Signed-off-by: Yash Shah Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/io.h | 7 +++++++ arch/riscv/include/asm/pgtable.h | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index fc1189ad3777..3ba4d93721d3 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -13,6 +13,7 @@ #include #include +#include extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); @@ -161,6 +162,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) #define writeq(v,c) ({ __io_bw(); writeq_cpu((v),(c)); __io_aw(); }) #endif +/* + * I/O port access constants. + */ +#define IO_SPACE_LIMIT (PCI_IO_SIZE - 1) +#define PCI_IOBASE ((void __iomem *)PCI_IO_START) + /* * Emulation routines for the port-mapped IO space used by some PCI drivers. * These are defined as being "fully synchronous", but also "not guaranteed to diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 0352f20c29f4..d3221017194d 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -7,6 +7,7 @@ #define _ASM_RISCV_PGTABLE_H #include +#include #include @@ -86,6 +87,7 @@ extern pgd_t swapper_pg_dir[]; #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) +#define PCI_IO_SIZE SZ_16M /* * Roughly size the vmemmap space to be large enough to fit enough @@ -100,7 +102,10 @@ extern pgd_t swapper_pg_dir[]; #define vmemmap ((struct page *)VMEMMAP_START) -#define FIXADDR_TOP (VMEMMAP_START) +#define PCI_IO_END VMEMMAP_START +#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) +#define FIXADDR_TOP PCI_IO_START + #ifdef CONFIG_64BIT #define FIXADDR_SIZE PMD_SIZE #else -- cgit v1.2.3 From b681a0529968d2261aa15d7a1e78801b2c06bb07 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Sat, 26 Oct 2019 14:56:35 +0800 Subject: RDMA/hns: Prevent memory leaks of eq->buf_list eq->buf_list->buf and eq->buf_list should also be freed when eqe_hop_num is set to 0, or there will be memory leaks. Fixes: a5073d6054f7 ("RDMA/hns: Add eq support of hip08") Link: https://lore.kernel.org/r/1572072995-11277-3-git-send-email-liweihang@hisilicon.com Signed-off-by: Lijun Ou Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7a89d669f8bf..e82567fcdeb7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5389,9 +5389,9 @@ static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev, return; } - if (eq->buf_list) - dma_free_coherent(hr_dev->dev, buf_chk_sz, - eq->buf_list->buf, eq->buf_list->map); + dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf, + eq->buf_list->map); + kfree(eq->buf_list); } static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, -- cgit v1.2.3 From d7d16a89350ab263484c0aa2b523dd3a234e4a80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:48 -0700 Subject: net: add skb_queue_empty_lockless() Some paths call skb_queue_empty() without holding the queue lock. We must use a barrier in order to not let the compiler do strange things, and avoid KCSAN splats. Adding a barrier in skb_queue_empty() might be overkill, I prefer adding a new helper to clearly identify points where the callers might be lockless. This might help us finding real bugs. The corresponding WRITE_ONCE() should add zero cost for current compilers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a391147c03d4..64a395c7f689 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1495,6 +1495,19 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) return list->next == (const struct sk_buff *) list; } +/** + * skb_queue_empty_lockless - check if a queue is empty + * @list: queue head + * + * Returns true if the queue is empty, false otherwise. + * This variant can be used in lockless contexts. + */ +static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list) +{ + return READ_ONCE(list->next) == (const struct sk_buff *) list; +} + + /** * skb_queue_is_last - check if skb is the last entry in the queue * @list: queue head @@ -1848,9 +1861,11 @@ static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) { - newsk->next = next; - newsk->prev = prev; - next->prev = prev->next = newsk; + /* see skb_queue_empty_lockless() for the opposite READ_ONCE() */ + WRITE_ONCE(newsk->next, next); + WRITE_ONCE(newsk->prev, prev); + WRITE_ONCE(next->prev, newsk); + WRITE_ONCE(prev->next, newsk); list->qlen++; } @@ -1861,11 +1876,11 @@ static inline void __skb_queue_splice(const struct sk_buff_head *list, struct sk_buff *first = list->next; struct sk_buff *last = list->prev; - first->prev = prev; - prev->next = first; + WRITE_ONCE(first->prev, prev); + WRITE_ONCE(prev->next, first); - last->next = next; - next->prev = last; + WRITE_ONCE(last->next, next); + WRITE_ONCE(next->prev, last); } /** @@ -2006,8 +2021,8 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; - next->prev = prev; - prev->next = next; + WRITE_ONCE(next->prev, prev); + WRITE_ONCE(prev->next, next); } /** -- cgit v1.2.3 From 137a0dbe3426fd7bcfe3f8117b36a87b3590e4eb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:49 -0700 Subject: udp: use skb_queue_empty_lockless() syzbot reported a data-race [1]. We should use skb_queue_empty_lockless() to document that we are not ensuring a mutual exclusion and silence KCSAN. [1] BUG: KCSAN: data-race in __skb_recv_udp / __udp_enqueue_schedule_skb write to 0xffff888122474b50 of 8 bytes by interrupt on cpu 0: __skb_insert include/linux/skbuff.h:1852 [inline] __skb_queue_before include/linux/skbuff.h:1958 [inline] __skb_queue_tail include/linux/skbuff.h:1991 [inline] __udp_enqueue_schedule_skb+0x2c1/0x410 net/ipv4/udp.c:1470 __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline] udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057 udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074 udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233 __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300 udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 read to 0xffff888122474b50 of 8 bytes by task 8921 on cpu 1: skb_queue_empty include/linux/skbuff.h:1494 [inline] __skb_recv_udp+0x18d/0x500 net/ipv4/udp.c:1653 udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 __do_sys_recvmmsg net/socket.c:2703 [inline] __se_sys_recvmmsg net/socket.c:2696 [inline] __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 8921 Comm: syz-executor.4 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 14bc654b6842..2cc259736c2e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1577,7 +1577,7 @@ static int first_packet_length(struct sock *sk) spin_lock_bh(&rcvq->lock); skb = __first_packet_length(sk, rcvq, &total); - if (!skb && !skb_queue_empty(sk_queue)) { + if (!skb && !skb_queue_empty_lockless(sk_queue)) { spin_lock(&sk_queue->lock); skb_queue_splice_tail_init(sk_queue, rcvq); spin_unlock(&sk_queue->lock); @@ -1650,7 +1650,7 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, return skb; } - if (skb_queue_empty(sk_queue)) { + if (skb_queue_empty_lockless(sk_queue)) { spin_unlock_bh(&queue->lock); goto busy_check; } @@ -1676,7 +1676,7 @@ busy_check: break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (!skb_queue_empty(sk_queue)); + } while (!skb_queue_empty_lockless(sk_queue)); /* sk_queue is empty, reader_queue may contain peeked packets */ } while (timeo && -- cgit v1.2.3 From 3ef7cf57c72f32f61e97f8fa401bc39ea1f1a5d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:50 -0700 Subject: net: use skb_queue_empty_lockless() in poll() handlers Many poll() handlers are lockless. Using skb_queue_empty_lockless() instead of skb_queue_empty() is more appropriate. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/isdn/capi/capi.c | 2 +- net/atm/common.c | 2 +- net/bluetooth/af_bluetooth.c | 4 ++-- net/caif/caif_socket.c | 2 +- net/core/datagram.c | 4 ++-- net/decnet/af_decnet.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/udp.c | 2 +- net/nfc/llcp_sock.c | 4 ++-- net/phonet/socket.c | 4 ++-- net/sctp/socket.c | 4 ++-- net/tipc/socket.c | 4 ++-- net/unix/af_unix.c | 6 +++--- net/vmw_vsock/af_vsock.c | 2 +- 14 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index c92b405b7646..ba8619524231 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -744,7 +744,7 @@ capi_poll(struct file *file, poll_table *wait) poll_wait(file, &(cdev->recvwait), wait); mask = EPOLLOUT | EPOLLWRNORM; - if (!skb_queue_empty(&cdev->recvqueue)) + if (!skb_queue_empty_lockless(&cdev->recvqueue)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } diff --git a/net/atm/common.c b/net/atm/common.c index b7528e77997c..0ce530af534d 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -668,7 +668,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) mask |= EPOLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* writable? */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 94ddf19998c7..5f508c50649d 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -460,7 +460,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock, if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); @@ -470,7 +470,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; if (sk->sk_state == BT_CLOSED) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 13ea920600ae..ef14da50a981 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -953,7 +953,7 @@ static __poll_t caif_poll(struct file *file, mask |= EPOLLRDHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue) || + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) mask |= EPOLLIN | EPOLLRDNORM; diff --git a/net/core/datagram.c b/net/core/datagram.c index c210fc116103..5b685e110aff 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -767,7 +767,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); @@ -777,7 +777,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, mask |= EPOLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 0ea75286abf4..3349ea81f901 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1205,7 +1205,7 @@ static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wai struct dn_scp *scp = DN_SK(sk); __poll_t mask = datagram_poll(file, sock, wait); - if (!skb_queue_empty(&scp->other_receive_queue)) + if (!skb_queue_empty_lockless(&scp->other_receive_queue)) mask |= EPOLLRDBAND; return mask; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 42187a3b82f4..ffef502f5292 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -584,7 +584,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR; return mask; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2cc259736c2e..345a3d43f5a6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2712,7 +2712,7 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; - if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) + if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Check for false positives due to checksum errors */ diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index ccdd790e163a..28604414dec1 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -554,11 +554,11 @@ static __poll_t llcp_sock_poll(struct file *file, struct socket *sock, if (sk->sk_state == LLCP_LISTEN) return llcp_accept_poll(sk); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; if (sk->sk_state == LLCP_CLOSED) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 96ea9f254ae9..76d499f6af9a 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -338,9 +338,9 @@ static __poll_t pn_socket_poll(struct file *file, struct socket *sock, if (sk->sk_state == TCP_CLOSE) return EPOLLERR; - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; - if (!skb_queue_empty(&pn->ctrlreq_queue)) + if (!skb_queue_empty_lockless(&pn->ctrlreq_queue)) mask |= EPOLLPRI; if (!mask && sk->sk_state == TCP_CLOSE_WAIT) return EPOLLHUP; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5ca0ec0e823c..cfb25391b8b0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -8476,7 +8476,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) mask = 0; /* Is there any exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -8485,7 +8485,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) mask |= EPOLLHUP; /* Is it readable? Reconsider this code with TCP-style support. */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* The association is either gone or not ready. */ diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f8bbc4aab213..4b92b196cfa6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -740,7 +740,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, /* fall through */ case TIPC_LISTEN: case TIPC_CONNECTING: - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) revents |= EPOLLIN | EPOLLRDNORM; break; case TIPC_OPEN: @@ -748,7 +748,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, revents |= EPOLLOUT; if (!tipc_sk_type_connectionless(sk)) break; - if (skb_queue_empty(&sk->sk_receive_queue)) + if (skb_queue_empty_lockless(&sk->sk_receive_queue)) break; revents |= EPOLLIN | EPOLLRDNORM; break; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 67e87db5877f..0d8da809bea2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2599,7 +2599,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -2628,7 +2628,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); @@ -2638,7 +2638,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, mask |= EPOLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2ab43b2bba31..582a3e4dfce2 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -870,7 +870,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, * the queue and write as long as the socket isn't shutdown for * sending. */ - if (!skb_queue_empty(&sk->sk_receive_queue) || + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) { mask |= EPOLLIN | EPOLLRDNORM; } -- cgit v1.2.3 From 3f926af3f4d688e2e11e7f8ed04e277a14d4d4a4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:51 -0700 Subject: net: use skb_queue_empty_lockless() in busy poll contexts Busy polling usually runs without locks. Let's use skb_queue_empty_lockless() instead of skb_queue_empty() Also uses READ_ONCE() in __skb_try_recv_datagram() to address a similar potential problem. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chtls/chtls_io.c | 2 +- drivers/nvme/host/tcp.c | 2 +- net/core/datagram.c | 2 +- net/core/sock.c | 2 +- net/ipv4/tcp.c | 2 +- net/sctp/socket.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index 0891ab829b1b..98bc5a4cd5e7 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -1702,7 +1702,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return peekmsg(sk, msg, len, nonblock, flags); if (sk_can_busy_loop(sk) && - skb_queue_empty(&sk->sk_receive_queue) && + skb_queue_empty_lockless(&sk->sk_receive_queue) && sk->sk_state == TCP_ESTABLISHED) sk_busy_loop(sk, nonblock); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 770dbcbc999e..7544be84ab35 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2219,7 +2219,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; - if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue)) + if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); nvme_tcp_try_recv(queue); return queue->nr_cqe; diff --git a/net/core/datagram.c b/net/core/datagram.c index 5b685e110aff..03515e46a49a 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -278,7 +278,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (sk->sk_receive_queue.prev != *last); + } while (READ_ONCE(sk->sk_receive_queue.prev) != *last); error = -EAGAIN; diff --git a/net/core/sock.c b/net/core/sock.c index a515392ba84b..b8e758bcb6ad 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3600,7 +3600,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) { struct sock *sk = p; - return !skb_queue_empty(&sk->sk_receive_queue) || + return !skb_queue_empty_lockless(&sk->sk_receive_queue) || sk_busy_loop_timeout(sk, start_time); } EXPORT_SYMBOL(sk_busy_loop_end); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ffef502f5292..d8876f0e9672 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1964,7 +1964,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); - if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && + if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && (sk->sk_state == TCP_ESTABLISHED)) sk_busy_loop(sk, nonblock); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index cfb25391b8b0..ca81e06df165 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -8871,7 +8871,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (sk_can_busy_loop(sk)) { sk_busy_loop(sk, noblock); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) continue; } -- cgit v1.2.3 From 7c422d0ce97552dde4a97e6290de70ec6efb0fc6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:52 -0700 Subject: net: add READ_ONCE() annotation in __skb_wait_for_more_packets() __skb_wait_for_more_packets() can be called while other cpus can feed packets to the socket receive queue. KCSAN reported : BUG: KCSAN: data-race in __skb_wait_for_more_packets / __udp_enqueue_schedule_skb write to 0xffff888102e40b58 of 8 bytes by interrupt on cpu 0: __skb_insert include/linux/skbuff.h:1852 [inline] __skb_queue_before include/linux/skbuff.h:1958 [inline] __skb_queue_tail include/linux/skbuff.h:1991 [inline] __udp_enqueue_schedule_skb+0x2d7/0x410 net/ipv4/udp.c:1470 __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline] udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057 udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074 udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233 __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300 udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 read to 0xffff888102e40b58 of 8 bytes by task 13035 on cpu 1: __skb_wait_for_more_packets+0xfa/0x320 net/core/datagram.c:100 __skb_recv_udp+0x374/0x500 net/ipv4/udp.c:1683 udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 __do_sys_recvmmsg net/socket.c:2703 [inline] __se_sys_recvmmsg net/socket.c:2696 [inline] __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 13035 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 03515e46a49a..da3c24ed129c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -97,7 +97,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, if (error) goto out_err; - if (sk->sk_receive_queue.prev != skb) + if (READ_ONCE(sk->sk_receive_queue.prev) != skb) goto out; /* Socket shut down? */ -- cgit v1.2.3 From 7de4344f2abbaeda5c62211a8314dbd5da2bac6c Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Thu, 24 Oct 2019 20:42:00 +0530 Subject: net: dpaa2: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files related to DPAA2 Ethernet driver supporting Freescale SoCs with DPAA2. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h | 2 +- drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h | 2 +- drivers/net/ethernet/freescale/dpaa2/dprtc.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h index ff2e177395d4..df2458a5e9ef 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright 2018 NXP */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h index 720cd50f5895..4ac05bfef338 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright 2013-2016 Freescale Semiconductor Inc. * Copyright 2016-2018 NXP diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h index be7914c1634d..311c184e1aef 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright 2013-2016 Freescale Semiconductor Inc. * Copyright 2016-2018 NXP -- cgit v1.2.3 From a793183caa9afae907a0d7ddd2ffd57329369bf5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 24 Oct 2019 11:43:31 -0700 Subject: udp: fix data-race in udp_set_dev_scratch() KCSAN reported a data-race in udp_set_dev_scratch() [1] The issue here is that we must not write over skb fields if skb is shared. A similar issue has been fixed in commit 89c22d8c3b27 ("net: Fix skb csum races when peeking") While we are at it, use a helper only dealing with udp_skb_scratch(skb)->csum_unnecessary, as this allows udp_set_dev_scratch() to be called once and thus inlined. [1] BUG: KCSAN: data-race in udp_set_dev_scratch / udpv6_recvmsg write to 0xffff888120278317 of 1 bytes by task 10411 on cpu 1: udp_set_dev_scratch+0xea/0x200 net/ipv4/udp.c:1308 __first_packet_length+0x147/0x420 net/ipv4/udp.c:1556 first_packet_length+0x68/0x2a0 net/ipv4/udp.c:1579 udp_poll+0xea/0x110 net/ipv4/udp.c:2720 sock_poll+0xed/0x250 net/socket.c:1256 vfs_poll include/linux/poll.h:90 [inline] do_select+0x7d0/0x1020 fs/select.c:534 core_sys_select+0x381/0x550 fs/select.c:677 do_pselect.constprop.0+0x11d/0x160 fs/select.c:759 __do_sys_pselect6 fs/select.c:784 [inline] __se_sys_pselect6 fs/select.c:769 [inline] __x64_sys_pselect6+0x12e/0x170 fs/select.c:769 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 read to 0xffff888120278317 of 1 bytes by task 10413 on cpu 0: udp_skb_csum_unnecessary include/net/udp.h:358 [inline] udpv6_recvmsg+0x43e/0xe90 net/ipv6/udp.c:310 inet6_recvmsg+0xbb/0x240 net/ipv6/af_inet6.c:592 sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 __do_sys_recvmmsg net/socket.c:2703 [inline] __se_sys_recvmmsg net/socket.c:2696 [inline] __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 10413 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Paolo Abeni Reviewed-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 345a3d43f5a6..d1ed160af202 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1316,6 +1316,20 @@ static void udp_set_dev_scratch(struct sk_buff *skb) scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } +static void udp_skb_csum_unnecessary_set(struct sk_buff *skb) +{ + /* We come here after udp_lib_checksum_complete() returned 0. + * This means that __skb_checksum_complete() might have + * set skb->csum_valid to 1. + * On 64bit platforms, we can set csum_unnecessary + * to true, but only if the skb is not shared. + */ +#if BITS_PER_LONG == 64 + if (!skb_shared(skb)) + udp_skb_scratch(skb)->csum_unnecessary = true; +#endif +} + static int udp_skb_truesize(struct sk_buff *skb) { return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS; @@ -1550,10 +1564,7 @@ static struct sk_buff *__first_packet_length(struct sock *sk, *total += skb->truesize; kfree_skb(skb); } else { - /* the csum related bits could be changed, refresh - * the scratch area - */ - udp_set_dev_scratch(skb); + udp_skb_csum_unnecessary_set(skb); break; } } -- cgit v1.2.3 From 20eb4f29b60286e0d6dc01d9c260b4bd383c58fb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 Oct 2019 13:50:27 -0700 Subject: net: fix sk_page_frag() recursion from memory reclaim sk_page_frag() optimizes skb_frag allocations by using per-task skb_frag cache when it knows it's the only user. The condition is determined by seeing whether the socket allocation mask allows blocking - if the allocation may block, it obviously owns the task's context and ergo exclusively owns current->task_frag. Unfortunately, this misses recursion through memory reclaim path. Please take a look at the following backtrace. [2] RIP: 0010:tcp_sendmsg_locked+0xccf/0xe10 ... tcp_sendmsg+0x27/0x40 sock_sendmsg+0x30/0x40 sock_xmit.isra.24+0xa1/0x170 [nbd] nbd_send_cmd+0x1d2/0x690 [nbd] nbd_queue_rq+0x1b5/0x3b0 [nbd] __blk_mq_try_issue_directly+0x108/0x1b0 blk_mq_request_issue_directly+0xbd/0xe0 blk_mq_try_issue_list_directly+0x41/0xb0 blk_mq_sched_insert_requests+0xa2/0xe0 blk_mq_flush_plug_list+0x205/0x2a0 blk_flush_plug_list+0xc3/0xf0 [1] blk_finish_plug+0x21/0x2e _xfs_buf_ioapply+0x313/0x460 __xfs_buf_submit+0x67/0x220 xfs_buf_read_map+0x113/0x1a0 xfs_trans_read_buf_map+0xbf/0x330 xfs_btree_read_buf_block.constprop.42+0x95/0xd0 xfs_btree_lookup_get_block+0x95/0x170 xfs_btree_lookup+0xcc/0x470 xfs_bmap_del_extent_real+0x254/0x9a0 __xfs_bunmapi+0x45c/0xab0 xfs_bunmapi+0x15/0x30 xfs_itruncate_extents_flags+0xca/0x250 xfs_free_eofblocks+0x181/0x1e0 xfs_fs_destroy_inode+0xa8/0x1b0 destroy_inode+0x38/0x70 dispose_list+0x35/0x50 prune_icache_sb+0x52/0x70 super_cache_scan+0x120/0x1a0 do_shrink_slab+0x120/0x290 shrink_slab+0x216/0x2b0 shrink_node+0x1b6/0x4a0 do_try_to_free_pages+0xc6/0x370 try_to_free_mem_cgroup_pages+0xe3/0x1e0 try_charge+0x29e/0x790 mem_cgroup_charge_skmem+0x6a/0x100 __sk_mem_raise_allocated+0x18e/0x390 __sk_mem_schedule+0x2a/0x40 [0] tcp_sendmsg_locked+0x8eb/0xe10 tcp_sendmsg+0x27/0x40 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x26d/0x2b0 __sys_sendmsg+0x57/0xa0 do_syscall_64+0x42/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 In [0], tcp_send_msg_locked() was using current->page_frag when it called sk_wmem_schedule(). It already calculated how many bytes can be fit into current->page_frag. Due to memory pressure, sk_wmem_schedule() called into memory reclaim path which called into xfs and then IO issue path. Because the filesystem in question is backed by nbd, the control goes back into the tcp layer - back into tcp_sendmsg_locked(). nbd sets sk_allocation to (GFP_NOIO | __GFP_MEMALLOC) which makes sense - it's in the process of freeing memory and wants to be able to, e.g., drop clean pages to make forward progress. However, this confused sk_page_frag() called from [2]. Because it only tests whether the allocation allows blocking which it does, it now thinks current->page_frag can be used again although it already was being used in [0]. After [2] used current->page_frag, the offset would be increased by the used amount. When the control returns to [0], current->page_frag's offset is increased and the previously calculated number of bytes now may overrun the end of allocated memory leading to silent memory corruptions. Fix it by adding gfpflags_normal_context() which tests sleepable && !reclaim and use it to determine whether to use current->task_frag. v2: Eric didn't like gfp flags being tested twice. Introduce a new helper gfpflags_normal_context() and combine the two tests. Signed-off-by: Tejun Heo Cc: Josef Bacik Cc: Eric Dumazet Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/gfp.h | 23 +++++++++++++++++++++++ include/net/sock.h | 11 ++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index fb07b503dc45..61f2f6ff9467 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -325,6 +325,29 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } +/** + * gfpflags_normal_context - is gfp_flags a normal sleepable context? + * @gfp_flags: gfp_flags to test + * + * Test whether @gfp_flags indicates that the allocation is from the + * %current context and allowed to sleep. + * + * An allocation being allowed to block doesn't mean it owns the %current + * context. When direct reclaim path tries to allocate memory, the + * allocation context is nested inside whatever %current was doing at the + * time of the original allocation. The nested allocation may be allowed + * to block but modifying anything %current owns can corrupt the outer + * context's expectations. + * + * %true result from this function indicates that the allocation context + * can sleep and use anything that's associated with %current. + */ +static inline bool gfpflags_normal_context(const gfp_t gfp_flags) +{ + return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) == + __GFP_DIRECT_RECLAIM; +} + #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else diff --git a/include/net/sock.h b/include/net/sock.h index f69b58bff7e5..c31a9ed86d5a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2242,12 +2242,17 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, * sk_page_frag - return an appropriate page_frag * @sk: socket * - * If socket allocation mode allows current thread to sleep, it means its - * safe to use the per task page_frag instead of the per socket one. + * Use the per task page_frag instead of the per socket one for + * optimization when we know that we're in the normal context and owns + * everything that's associated with %current. + * + * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest + * inside other socket operations and end up recursing into sk_page_frag() + * while it's already in use. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { - if (gfpflags_allow_blocking(sk->sk_allocation)) + if (gfpflags_normal_context(sk->sk_allocation)) return ¤t->task_frag; return &sk->sk_frag; -- cgit v1.2.3 From 88824e3bf29a2fcacfd9ebbfe03063649f0f3254 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 25 Oct 2019 13:47:24 +1100 Subject: net: ethernet: ftgmac100: Fix DMA coherency issue with SW checksum We are calling the checksum helper after the dma_map_single() call to map the packet. This is incorrect as the checksumming code will touch the packet from the CPU. This means the cache won't be properly flushes (or the bounce buffering will leave us with the unmodified packet to DMA). This moves the calculation of the checksum & vlan tags to before the DMA mapping. This also has the side effect of fixing another bug: If the checksum helper fails, we goto "drop" to drop the packet, which will not unmap the DMA mapping. Signed-off-by: Benjamin Herrenschmidt Fixes: 05690d633f30 ("ftgmac100: Upgrade to NETIF_F_HW_CSUM") Reviewed-by: Vijay Khemka Tested-by: Vijay Khemka Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 9b7af94a40bb..96e9565f1e08 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -727,6 +727,18 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb, */ nfrags = skb_shinfo(skb)->nr_frags; + /* Setup HW checksumming */ + csum_vlan = 0; + if (skb->ip_summed == CHECKSUM_PARTIAL && + !ftgmac100_prep_tx_csum(skb, &csum_vlan)) + goto drop; + + /* Add VLAN tag */ + if (skb_vlan_tag_present(skb)) { + csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG; + csum_vlan |= skb_vlan_tag_get(skb) & 0xffff; + } + /* Get header len */ len = skb_headlen(skb); @@ -753,19 +765,6 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb, if (nfrags == 0) f_ctl_stat |= FTGMAC100_TXDES0_LTS; txdes->txdes3 = cpu_to_le32(map); - - /* Setup HW checksumming */ - csum_vlan = 0; - if (skb->ip_summed == CHECKSUM_PARTIAL && - !ftgmac100_prep_tx_csum(skb, &csum_vlan)) - goto drop; - - /* Add VLAN tag */ - if (skb_vlan_tag_present(skb)) { - csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG; - csum_vlan |= skb_vlan_tag_get(skb) & 0xffff; - } - txdes->txdes1 = cpu_to_le32(csum_vlan); /* Next descriptor */ -- cgit v1.2.3 From 5d294fc483405de9c0913ab744a31e6fa7cb0f40 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Oct 2019 09:26:35 +0200 Subject: net: dsa: sja1105: improve NET_DSA_SJA1105_TAS dependency An earlier bugfix introduced a dependency on CONFIG_NET_SCH_TAPRIO, but this missed the case of NET_SCH_TAPRIO=m and NET_DSA_SJA1105=y, which still causes a link error: drivers/net/dsa/sja1105/sja1105_tas.o: In function `sja1105_setup_tc_taprio': sja1105_tas.c:(.text+0x5c): undefined reference to `taprio_offload_free' sja1105_tas.c:(.text+0x3b4): undefined reference to `taprio_offload_get' drivers/net/dsa/sja1105/sja1105_tas.o: In function `sja1105_tas_teardown': sja1105_tas.c:(.text+0x6ec): undefined reference to `taprio_offload_free' Change the dependency to only allow selecting the TAS code when it can link against the taprio code. Fixes: a8d570de0cc6 ("net: dsa: sja1105: Add dependency for NET_DSA_SJA1105_TAS") Fixes: 317ab5b86c8e ("net: dsa: sja1105: Configure the Time-Aware Scheduler via tc-taprio offload") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/sja1105/Kconfig b/drivers/net/dsa/sja1105/Kconfig index f40b248f0b23..ffac0ea4e8d5 100644 --- a/drivers/net/dsa/sja1105/Kconfig +++ b/drivers/net/dsa/sja1105/Kconfig @@ -26,8 +26,8 @@ config NET_DSA_SJA1105_PTP config NET_DSA_SJA1105_TAS bool "Support for the Time-Aware Scheduler on NXP SJA1105" - depends on NET_DSA_SJA1105 - depends on NET_SCH_TAPRIO + depends on NET_DSA_SJA1105 && NET_SCH_TAPRIO + depends on NET_SCH_TAPRIO=y || NET_DSA_SJA1105=m help This enables support for the TTEthernet-based egress scheduling engine in the SJA1105 DSA driver, which is controlled using a -- cgit v1.2.3 From 0a29ac5bd3a988dc151c8d26910dec2557421f64 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 25 Oct 2019 10:04:13 +0200 Subject: net: usb: lan78xx: Disable interrupts before calling generic_handle_irq() lan78xx_status() will run with interrupts enabled due to the change in ed194d136769 ("usb: core: remove local_irq_save() around ->complete() handler"). generic_handle_irq() expects to be run with IRQs disabled. [ 4.886203] 000: irq 79 handler irq_default_primary_handler+0x0/0x8 enabled interrupts [ 4.886243] 000: WARNING: CPU: 0 PID: 0 at kernel/irq/handle.c:152 __handle_irq_event_percpu+0x154/0x168 [ 4.896294] 000: Modules linked in: [ 4.896301] 000: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.6 #39 [ 4.896310] 000: Hardware name: Raspberry Pi 3 Model B+ (DT) [ 4.896315] 000: pstate: 60000005 (nZCv daif -PAN -UAO) [ 4.896321] 000: pc : __handle_irq_event_percpu+0x154/0x168 [ 4.896331] 000: lr : __handle_irq_event_percpu+0x154/0x168 [ 4.896339] 000: sp : ffff000010003cc0 [ 4.896346] 000: x29: ffff000010003cc0 x28: 0000000000000060 [ 4.896355] 000: x27: ffff000011021980 x26: ffff00001189c72b [ 4.896364] 000: x25: ffff000011702bc0 x24: ffff800036d6e400 [ 4.896373] 000: x23: 000000000000004f x22: ffff000010003d64 [ 4.896381] 000: x21: 0000000000000000 x20: 0000000000000002 [ 4.896390] 000: x19: ffff8000371c8480 x18: 0000000000000060 [ 4.896398] 000: x17: 0000000000000000 x16: 00000000000000eb [ 4.896406] 000: x15: ffff000011712d18 x14: 7265746e69206465 [ 4.896414] 000: x13: ffff000010003ba0 x12: ffff000011712df0 [ 4.896422] 000: x11: 0000000000000001 x10: ffff000011712e08 [ 4.896430] 000: x9 : 0000000000000001 x8 : 000000000003c920 [ 4.896437] 000: x7 : ffff0000118cc410 x6 : ffff0000118c7f00 [ 4.896445] 000: x5 : 000000000003c920 x4 : 0000000000004510 [ 4.896453] 000: x3 : ffff000011712dc8 x2 : 0000000000000000 [ 4.896461] 000: x1 : 73a3f67df94c1500 x0 : 0000000000000000 [ 4.896466] 000: Call trace: [ 4.896471] 000: __handle_irq_event_percpu+0x154/0x168 [ 4.896481] 000: handle_irq_event_percpu+0x50/0xb0 [ 4.896489] 000: handle_irq_event+0x40/0x98 [ 4.896497] 000: handle_simple_irq+0xa4/0xf0 [ 4.896505] 000: generic_handle_irq+0x24/0x38 [ 4.896513] 000: intr_complete+0xb0/0xe0 [ 4.896525] 000: __usb_hcd_giveback_urb+0x58/0xd8 [ 4.896533] 000: usb_giveback_urb_bh+0xd0/0x170 [ 4.896539] 000: tasklet_action_common.isra.0+0x9c/0x128 [ 4.896549] 000: tasklet_hi_action+0x24/0x30 [ 4.896556] 000: __do_softirq+0x120/0x23c [ 4.896564] 000: irq_exit+0xb8/0xd8 [ 4.896571] 000: __handle_domain_irq+0x64/0xb8 [ 4.896579] 000: bcm2836_arm_irqchip_handle_irq+0x60/0xc0 [ 4.896586] 000: el1_irq+0xb8/0x140 [ 4.896592] 000: arch_cpu_idle+0x10/0x18 [ 4.896601] 000: do_idle+0x200/0x280 [ 4.896608] 000: cpu_startup_entry+0x20/0x28 [ 4.896615] 000: rest_init+0xb4/0xc0 [ 4.896623] 000: arch_call_rest_init+0xc/0x14 [ 4.896632] 000: start_kernel+0x454/0x480 Fixes: ed194d136769 ("usb: core: remove local_irq_save() around ->complete() handler") Cc: Woojung Huh Cc: Marc Zyngier Cc: Andrew Lunn Cc: Stefan Wahren Cc: Jisheng Zhang Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: David Miller Signed-off-by: Daniel Wagner Tested-by: Stefan Wahren Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 62948098191f..f24a1b0b801f 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1264,8 +1264,11 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb) netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata); lan78xx_defer_kevent(dev, EVENT_LINK_RESET); - if (dev->domain_data.phyirq > 0) + if (dev->domain_data.phyirq > 0) { + local_irq_disable(); generic_handle_irq(dev->domain_data.phyirq); + local_irq_enable(); + } } else netdev_warn(dev->net, "unexpected interrupt: 0x%08x\n", intdata); -- cgit v1.2.3 From 427400fc5c1988245827bacb0dfba0214f153a2f Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 16 Oct 2019 23:43:19 +1030 Subject: clk: ast2600: Fix enabling of clocks The struct clk_ops enable callback for the aspeed gates mixes up the set to clear and write to set registers. Fixes: d3d04f6c330a ("clk: Add support for AST2600 SoC") Reviewed-by: Andrew Jeffery Signed-off-by: Joel Stanley Link: https://lkml.kernel.org/r/20191016131319.31318-1-joel@jms.id.au Signed-off-by: Stephen Boyd --- drivers/clk/clk-ast2600.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c index 1c1bb39bb04e..b1318e6b655b 100644 --- a/drivers/clk/clk-ast2600.c +++ b/drivers/clk/clk-ast2600.c @@ -266,10 +266,11 @@ static int aspeed_g6_clk_enable(struct clk_hw *hw) /* Enable clock */ if (gate->flags & CLK_GATE_SET_TO_DISABLE) { - regmap_write(gate->map, get_clock_reg(gate), clk); - } else { - /* Use set to clear register */ + /* Clock is clear to enable, so use set to clear register */ regmap_write(gate->map, get_clock_reg(gate) + 0x04, clk); + } else { + /* Clock is set to enable, so use write to set register */ + regmap_write(gate->map, get_clock_reg(gate), clk); } if (gate->reset_idx >= 0) { -- cgit v1.2.3 From 2ccb4f16d013a0954459061d38172b1c53553ba6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 21 Oct 2019 17:59:22 -0700 Subject: hwmon: (ina3221) Fix read timeout issue After introducing "samples" to the calculation of wait time, the driver might timeout at the regmap_field_read_poll_timeout call, because the wait time could be longer than the 100000 usec limit due to a large "samples" number. So this patch sets the timeout limit to 2 times of the wait time in order to fix this issue. Fixes: 5c090abf945b ("hwmon: (ina3221) Add averaging mode support") Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20191022005922.30239-1-nicoleotsuka@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ina3221.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index 0037e2bdacd6..8a51dcf055ea 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -170,7 +170,7 @@ static inline int ina3221_wait_for_data(struct ina3221_data *ina) /* Polling the CVRF bit to make sure read data is ready */ return regmap_field_read_poll_timeout(ina->fields[F_CVRF], - cvrf, cvrf, wait, 100000); + cvrf, cvrf, wait, wait * 2); } static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg, -- cgit v1.2.3 From d3566abb1a1e7772116e4d50fb6a58d19c9802e5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 24 Oct 2019 16:38:04 +1000 Subject: scsi: qla2xxx: stop timer in shutdown path In shutdown/reboot paths, the timer is not stopped: qla2x00_shutdown pci_device_shutdown device_shutdown kernel_restart_prepare kernel_restart sys_reboot This causes lockups (on powerpc) when firmware config space access calls are interrupted by smp_send_stop later in reboot. Fixes: e30d1756480dc ("[SCSI] qla2xxx: Addition of shutdown callback handler.") Link: https://lore.kernel.org/r/20191024063804.14538-1-npiggin@gmail.com Signed-off-by: Nicholas Piggin Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index e4d765fc03ea..39f7782a133b 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3529,6 +3529,10 @@ qla2x00_shutdown(struct pci_dev *pdev) qla2x00_try_to_stop_firmware(vha); } + /* Disable timer */ + if (vha->timer_active) + qla2x00_stop_timer(vha); + /* Turn adapter off line */ vha->flags.online = 0; -- cgit v1.2.3 From 05d9a952832cb206a32e3705eff6edebdb2207e7 Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Wed, 11 Sep 2019 13:34:33 -0300 Subject: powerpc/prom_init: Undo relocation before entering secure mode The ultravisor will do an integrity check of the kernel image but we relocated it so the check will fail. Restore the original image by relocating it back to the kernel virtual base address. This works because during build vmlinux is linked with an expected virtual runtime address of KERNELBASE. Fixes: 6a9c930bd775 ("powerpc/prom_init: Add the ESM call to prom_init") Signed-off-by: Thiago Jung Bauermann Tested-by: Michael Anderson [mpe: Add IS_ENABLED() to fix the CONFIG_RELOCATABLE=n build] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190911163433.12822-1-bauerman@linux.ibm.com --- arch/powerpc/include/asm/elf.h | 3 +++ arch/powerpc/kernel/prom_init.c | 13 +++++++++++++ arch/powerpc/kernel/prom_init_check.sh | 3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 409c9bfb43d9..57c229a86f08 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -175,4 +175,7 @@ do { \ ARCH_DLINFO_CACHE_GEOMETRY; \ } while (0) +/* Relocate the kernel image to @final_address */ +void relocate(unsigned long final_address); + #endif /* _ASM_POWERPC_ELF_H */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a4e7762dd286..100f1b57ec2f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -3249,7 +3249,20 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt) /* Switch to secure mode. */ prom_printf("Switching to secure mode.\n"); + /* + * The ultravisor will do an integrity check of the kernel image but we + * relocated it so the check will fail. Restore the original image by + * relocating it back to the kernel virtual base address. + */ + if (IS_ENABLED(CONFIG_RELOCATABLE)) + relocate(KERNELBASE); + ret = enter_secure_mode(kbase, fdt); + + /* Relocate the kernel again. */ + if (IS_ENABLED(CONFIG_RELOCATABLE)) + relocate(kbase); + if (ret != U_SUCCESS) { prom_printf("Returned %d from switching to secure mode.\n", ret); prom_rtas_os_term("Switch to secure mode failed.\n"); diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 78bab17b1396..b183ab9c5107 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -26,7 +26,8 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start logo_linux_clut224 btext_prepare_BAT reloc_got2 kernstart_addr memstart_addr linux_banner _stext -__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." +__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC. +relocate" NM="$1" OBJ="$2" -- cgit v1.2.3 From afdc74ed2d57e86c10b1d6831339770a802bab9a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 22 Oct 2019 09:50:54 -0700 Subject: clk: sunxi: Fix operator precedence in sunxi_divs_clk_setup r375326 in Clang exposes an issue with operator precedence in sunxi_div_clk_setup: drivers/clk/sunxi/clk-sunxi.c:1083:30: warning: operator '?:' has lower precedence than '|'; '|' will be evaluated first [-Wbitwise-conditional-parentheses] data->div[i].critical ? ~~~~~~~~~~~~~~~~~~~~~ ^ drivers/clk/sunxi/clk-sunxi.c:1083:30: note: place parentheses around the '|' expression to silence this warning data->div[i].critical ? ^ ) drivers/clk/sunxi/clk-sunxi.c:1083:30: note: place parentheses around the '?:' expression to evaluate it first data->div[i].critical ? ^ ( 1 warning generated. It appears that the intention was for ?: to be evaluated first so that CLK_IS_CRITICAL could be added to clkflags if the critical boolean was set; right now, | is being evaluated first. Add parentheses around the ?: block to have it be evaluated first. Fixes: 9919d44ff297 ("clk: sunxi: Use CLK_IS_CRITICAL flag for critical clks") Link: https://github.com/ClangBuiltLinux/linux/issues/745 Signed-off-by: Nathan Chancellor Signed-off-by: Maxime Ripard --- drivers/clk/sunxi/clk-sunxi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c index d3a43381a792..27201fd26e44 100644 --- a/drivers/clk/sunxi/clk-sunxi.c +++ b/drivers/clk/sunxi/clk-sunxi.c @@ -1080,8 +1080,8 @@ static struct clk ** __init sunxi_divs_clk_setup(struct device_node *node, rate_hw, rate_ops, gate_hw, &clk_gate_ops, clkflags | - data->div[i].critical ? - CLK_IS_CRITICAL : 0); + (data->div[i].critical ? + CLK_IS_CRITICAL : 0)); WARN_ON(IS_ERR(clk_data->clks[i])); } -- cgit v1.2.3 From cdfc2e2086bf9c465f44e2db25561373b084a113 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 23 Oct 2019 12:28:09 +0100 Subject: clk: sunxi-ng: a80: fix the zero'ing of bits 16 and 18 The zero'ing of bits 16 and 18 is incorrect. Currently the code is masking with the bitwise-and of BIT(16) & BIT(18) which is 0, so the updated value for val is always zero. Fix this by bitwise and-ing value with the correct mask that will zero bits 16 and 18. Addresses-Coverity: (" Suspicious &= or |= constant expression") Fixes: b8eb71dcdd08 ("clk: sunxi-ng: Add A80 CCU") Signed-off-by: Colin Ian King Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun9i-a80.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c index dcac1391767f..ef29582676f6 100644 --- a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c +++ b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c @@ -1224,7 +1224,7 @@ static int sun9i_a80_ccu_probe(struct platform_device *pdev) /* Enforce d1 = 0, d2 = 0 for Audio PLL */ val = readl(reg + SUN9I_A80_PLL_AUDIO_REG); - val &= (BIT(16) & BIT(18)); + val &= ~(BIT(16) | BIT(18)); writel(val, reg + SUN9I_A80_PLL_AUDIO_REG); /* Enforce P = 1 for both CPU cluster PLLs */ -- cgit v1.2.3 From e614f341253f8541baf0230a8dc6a016b544b1e2 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Mon, 28 Oct 2019 22:58:58 +0100 Subject: ARM: dts: sun8i-a83t-tbs-a711: Fix WiFi resume from suspend Without enabling keep-power-in-suspend, we can't wake the device up using WOL packet, and the log is flooded with these messages on resume: sunxi-mmc 1c10000.mmc: send stop command failed sunxi-mmc 1c10000.mmc: data error, sending stop command sunxi-mmc 1c10000.mmc: send stop command failed sunxi-mmc 1c10000.mmc: data error, sending stop command So to make the WiFi really a wakeup-source, we need to keep it powered during suspend. Fixes: 0e23372080def7 ("arm: dts: sun8i: Add the TBS A711 tablet devicetree") Signed-off-by: Ondrej Jirman Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts index 568b90ece342..3bec3e0a81b2 100644 --- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts +++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts @@ -192,6 +192,7 @@ vqmmc-supply = <®_dldo1>; non-removable; wakeup-source; + keep-power-in-suspend; status = "okay"; brcmf: wifi@1 { -- cgit v1.2.3 From e690053e97e7a9c968df9a97cef9089dfa8e6a44 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Mon, 28 Oct 2019 22:49:14 +0100 Subject: ARM: sunxi: Fix CPU powerdown on A83T PRCM_PWROFF_GATING_REG has CPU0 at bit 4 on A83T. So without this patch, instead of gating the CPU0, the whole cluster was power gated, when shutting down first CPU in the cluster. Fixes: 6961275e72a8c1 ("ARM: sun8i: smp: Add support for A83T") Signed-off-by: Ondrej Jirman Acked-by: Chen-Yu Tsai Cc: stable@vger.kernel.org Signed-off-by: Maxime Ripard --- arch/arm/mach-sunxi/mc_smp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c index 239084cf8192..26cbce135338 100644 --- a/arch/arm/mach-sunxi/mc_smp.c +++ b/arch/arm/mach-sunxi/mc_smp.c @@ -481,14 +481,18 @@ static void sunxi_mc_smp_cpu_die(unsigned int l_cpu) static int sunxi_cpu_powerdown(unsigned int cpu, unsigned int cluster) { u32 reg; + int gating_bit = cpu; pr_debug("%s: cluster %u cpu %u\n", __func__, cluster, cpu); if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS) return -EINVAL; + if (is_a83t && cpu == 0) + gating_bit = 4; + /* gate processor power */ reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster)); - reg |= PRCM_PWROFF_GATING_REG_CORE(cpu); + reg |= PRCM_PWROFF_GATING_REG_CORE(gating_bit); writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster)); udelay(20); -- cgit v1.2.3 From 54f83b8c8ea9b22082a496deadf90447a326954e Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 28 Oct 2019 10:54:26 -0400 Subject: USB: gadget: Reject endpoints with 0 maxpacket value Endpoints with a maxpacket length of 0 are probably useless. They can't transfer any data, and it's not at all unlikely that a UDC will crash or hang when trying to handle a non-zero-length usb_request for such an endpoint. Indeed, dummy-hcd gets a divide error when trying to calculate the remainder of a transfer length by the maxpacket value, as discovered by the syzbot fuzzer. Currently the gadget core does not check for endpoints having a maxpacket value of 0. This patch adds a check to usb_ep_enable(), preventing such endpoints from being used. As far as I know, none of the gadget drivers in the kernel tries to create an endpoint with maxpacket = 0, but until now there has been nothing to prevent userspace programs under gadgetfs or configfs from doing it. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+8ab8bf161038a8768553@syzkaller.appspotmail.com CC: Acked-by: Felipe Balbi Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1910281052370.1485-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 92af8dc98c3d..51fa614b4079 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -98,6 +98,17 @@ int usb_ep_enable(struct usb_ep *ep) if (ep->enabled) goto out; + /* UDC drivers can't handle endpoints with maxpacket size 0 */ + if (usb_endpoint_maxp(ep->desc) == 0) { + /* + * We should log an error message here, but we can't call + * dev_err() because there's no way to find the gadget + * given only ep. + */ + ret = -EINVAL; + goto out; + } + ret = ep->ops->enable(ep, ep->desc); if (ret) goto out; -- cgit v1.2.3 From cd1cb3350561d2bf544ddfef76fbf0b1c9c7178f Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 23 Oct 2019 16:37:44 +0100 Subject: sched/topology: Don't try to build empty sched domains Turns out hotplugging CPUs that are in exclusive cpusets can lead to the cpuset code feeding empty cpumasks to the sched domain rebuild machinery. This leads to the following splat: Internal error: Oops: 96000004 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 235 Comm: kworker/5:2 Not tainted 5.4.0-rc1-00005-g8d495477d62e #23 Hardware name: ARM Juno development board (r0) (DT) Workqueue: events cpuset_hotplug_workfn pstate: 60000005 (nZCv daif -PAN -UAO) pc : build_sched_domains (./include/linux/arch_topology.h:23 kernel/sched/topology.c:1898 kernel/sched/topology.c:1969) lr : build_sched_domains (kernel/sched/topology.c:1966) Call trace: build_sched_domains (./include/linux/arch_topology.h:23 kernel/sched/topology.c:1898 kernel/sched/topology.c:1969) partition_sched_domains_locked (kernel/sched/topology.c:2250) rebuild_sched_domains_locked (./include/linux/bitmap.h:370 ./include/linux/cpumask.h:538 kernel/cgroup/cpuset.c:955 kernel/cgroup/cpuset.c:978 kernel/cgroup/cpuset.c:1019) rebuild_sched_domains (kernel/cgroup/cpuset.c:1032) cpuset_hotplug_workfn (kernel/cgroup/cpuset.c:3205 (discriminator 2)) process_one_work (./arch/arm64/include/asm/jump_label.h:21 ./include/linux/jump_label.h:200 ./include/trace/events/workqueue.h:114 kernel/workqueue.c:2274) worker_thread (./include/linux/compiler.h:199 ./include/linux/list.h:268 kernel/workqueue.c:2416) kthread (kernel/kthread.c:255) ret_from_fork (arch/arm64/kernel/entry.S:1167) Code: f860dae2 912802d6 aa1603e1 12800000 (f8616853) The faulty line in question is: cap = arch_scale_cpu_capacity(cpumask_first(cpu_map)); and we're not checking the return value against nr_cpu_ids (we shouldn't have to!), which leads to the above. Prevent generate_sched_domains() from returning empty cpumasks, and add some assertion in build_sched_domains() to scream bloody murder if it happens again. The above splat was obtained on my Juno r0 with the following reproducer: $ cgcreate -g cpuset:asym $ cgset -r cpuset.cpus=0-3 asym $ cgset -r cpuset.mems=0 asym $ cgset -r cpuset.cpu_exclusive=1 asym $ cgcreate -g cpuset:smp $ cgset -r cpuset.cpus=4-5 smp $ cgset -r cpuset.mems=0 smp $ cgset -r cpuset.cpu_exclusive=1 smp $ cgset -r cpuset.sched_load_balance=0 . $ echo 0 > /sys/devices/system/cpu/cpu4/online $ echo 0 > /sys/devices/system/cpu/cpu5/online Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Cc: Dietmar.Eggemann@arm.com Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hannes@cmpxchg.org Cc: lizefan@huawei.com Cc: morten.rasmussen@arm.com Cc: qperret@google.com Cc: tj@kernel.org Cc: vincent.guittot@linaro.org Fixes: 05484e098448 ("sched/topology: Add SD_ASYM_CPUCAPACITY flag detection") Link: https://lkml.kernel.org/r/20191023153745.19515-2-valentin.schneider@arm.com Signed-off-by: Ingo Molnar --- kernel/cgroup/cpuset.c | 3 ++- kernel/sched/topology.c | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index c52bc91f882b..c87ee6412b36 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -798,7 +798,8 @@ static int generate_sched_domains(cpumask_var_t **domains, cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus)) continue; - if (is_sched_load_balance(cp)) + if (is_sched_load_balance(cp) && + !cpumask_empty(cp->effective_cpus)) csa[csn++] = cp; /* skip @cp's subtree if not a partition root */ diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index b5667a273bf6..9318acf1d1fe 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1948,7 +1948,7 @@ next_level: static int build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr) { - enum s_alloc alloc_state; + enum s_alloc alloc_state = sa_none; struct sched_domain *sd; struct s_data d; struct rq *rq = NULL; @@ -1956,6 +1956,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att struct sched_domain_topology_level *tl_asym; bool has_asym = false; + if (WARN_ON(cpumask_empty(cpu_map))) + goto error; + alloc_state = __visit_domain_allocation_hell(&d, cpu_map); if (alloc_state != sa_rootdomain) goto error; -- cgit v1.2.3 From e284df705cf1eeedb5ec3a66ed82d17a64659150 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 23 Oct 2019 16:37:45 +0100 Subject: sched/topology: Allow sched_asym_cpucapacity to be disabled While the static key is correctly initialized as being disabled, it will remain forever enabled once turned on. This means that if we start with an asymmetric system and hotplug out enough CPUs to end up with an SMP system, the static key will remain set - which is obviously wrong. We should detect this and turn off things like misfit migration and capacity aware wakeups. As Quentin pointed out, having separate root domains makes this slightly trickier. We could have exclusive cpusets that create an SMP island - IOW, the domains within this root domain will not see any asymmetry. This means we can't just disable the key on domain destruction, we need to count how many asymmetric root domains we have. Consider the following example using Juno r0 which is 2+4 big.LITTLE, where two identical cpusets are created: they both span both big and LITTLE CPUs: asym0 asym1 [ ][ ] L L B L L B $ cgcreate -g cpuset:asym0 $ cgset -r cpuset.cpus=0,1,3 asym0 $ cgset -r cpuset.mems=0 asym0 $ cgset -r cpuset.cpu_exclusive=1 asym0 $ cgcreate -g cpuset:asym1 $ cgset -r cpuset.cpus=2,4,5 asym1 $ cgset -r cpuset.mems=0 asym1 $ cgset -r cpuset.cpu_exclusive=1 asym1 $ cgset -r cpuset.sched_load_balance=0 . (the CPU numbering may look odd because on the Juno LITTLEs are CPUs 0,3-5 and bigs are CPUs 1-2) If we make one of those SMP (IOW remove asymmetry) by e.g. hotplugging its big core, we would end up with an SMP cpuset and an asymmetric cpuset - the static key must remain set, because we still have one asymmetric root domain. With the above example, this could be done with: $ echo 0 > /sys/devices/system/cpu/cpu2/online Which would result in: asym0 asym1 [ ][ ] L L B L L When both SMP and asymmetric cpusets are present, all CPUs will observe sched_asym_cpucapacity being set (it is system-wide), but not all CPUs observe asymmetry in their sched domain hierarchy: per_cpu(sd_asym_cpucapacity, ) == per_cpu(sd_asym_cpucapacity, ) == NULL Change the simple key enablement to an increment, and decrement the key counter when destroying domains that cover asymmetric CPUs. Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dietmar Eggemann Cc: Dietmar.Eggemann@arm.com Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hannes@cmpxchg.org Cc: lizefan@huawei.com Cc: morten.rasmussen@arm.com Cc: qperret@google.com Cc: tj@kernel.org Cc: vincent.guittot@linaro.org Fixes: df054e8445a4 ("sched/topology: Add static_key for asymmetric CPU capacity optimizations") Link: https://lkml.kernel.org/r/20191023153745.19515-3-valentin.schneider@arm.com Signed-off-by: Ingo Molnar --- kernel/sched/topology.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 9318acf1d1fe..49b835f1305f 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -2029,7 +2029,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att rcu_read_unlock(); if (has_asym) - static_branch_enable_cpuslocked(&sched_asym_cpucapacity); + static_branch_inc_cpuslocked(&sched_asym_cpucapacity); if (rq && sched_debug_enabled) { pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n", @@ -2124,8 +2124,12 @@ int sched_init_domains(const struct cpumask *cpu_map) */ static void detach_destroy_domains(const struct cpumask *cpu_map) { + unsigned int cpu = cpumask_any(cpu_map); int i; + if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu))) + static_branch_dec_cpuslocked(&sched_asym_cpucapacity); + rcu_read_lock(); for_each_cpu(i, cpu_map) cpu_attach_domain(NULL, &def_root_domain, i); -- cgit v1.2.3 From 7d6475051fb3d9339c5c760ed9883bc0a9048b21 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Oct 2019 21:58:14 +1000 Subject: powerpc/powernv: Fix CPU idle to be called with IRQs disabled Commit e78a7614f3876 ("idle: Prevent late-arriving interrupts from disrupting offline") changes arch_cpu_idle_dead to be called with interrupts disabled, which triggers the WARN in pnv_smp_cpu_kill_self. Fix this by fixing up irq_happened after hard disabling, rather than requiring there are no pending interrupts, similarly to what was done done until commit 2525db04d1cc5 ("powerpc/powernv: Simplify lazy IRQ handling in CPU offline"). Fixes: e78a7614f3876 ("idle: Prevent late-arriving interrupts from disrupting offline") Reported-by: Paul Mackerras Signed-off-by: Nicholas Piggin [mpe: Add unexpected_mask rather than checking for known bad values, change the WARN_ON() to a WARN_ON_ONCE()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191022115814.22456-1-npiggin@gmail.com --- arch/powerpc/platforms/powernv/smp.c | 53 +++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index fbd6e6b7bbf2..13e251699346 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -146,20 +146,25 @@ static int pnv_smp_cpu_disable(void) return 0; } +static void pnv_flush_interrupts(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (xive_enabled()) + xive_flush_interrupt(); + else + icp_opal_flush_interrupt(); + } else { + icp_native_flush_interrupt(); + } +} + static void pnv_smp_cpu_kill_self(void) { + unsigned long srr1, unexpected_mask, wmask; unsigned int cpu; - unsigned long srr1, wmask; u64 lpcr_val; /* Standard hot unplug procedure */ - /* - * This hard disables local interurpts, ensuring we have no lazy - * irqs pending. - */ - WARN_ON(irqs_disabled()); - hard_irq_disable(); - WARN_ON(lazy_irq_pending()); idle_task_exit(); current->active_mm = NULL; /* for sanity */ @@ -172,6 +177,27 @@ static void pnv_smp_cpu_kill_self(void) if (cpu_has_feature(CPU_FTR_ARCH_207S)) wmask = SRR1_WAKEMASK_P8; + /* + * This turns the irq soft-disabled state we're called with, into a + * hard-disabled state with pending irq_happened interrupts cleared. + * + * PACA_IRQ_DEC - Decrementer should be ignored. + * PACA_IRQ_HMI - Can be ignored, processing is done in real mode. + * PACA_IRQ_DBELL, EE, PMI - Unexpected. + */ + hard_irq_disable(); + if (generic_check_cpu_restart(cpu)) + goto out; + + unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS); + if (local_paca->irq_happened & unexpected_mask) { + if (local_paca->irq_happened & PACA_IRQ_EE) + pnv_flush_interrupts(); + DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n", + cpu, local_paca->irq_happened); + } + local_paca->irq_happened = PACA_IRQ_HARD_DIS; + /* * We don't want to take decrementer interrupts while we are * offline, so clear LPCR:PECE1. We keep PECE2 (and @@ -197,6 +223,7 @@ static void pnv_smp_cpu_kill_self(void) srr1 = pnv_cpu_offline(cpu); + WARN_ON_ONCE(!irqs_disabled()); WARN_ON(lazy_irq_pending()); /* @@ -212,13 +239,7 @@ static void pnv_smp_cpu_kill_self(void) */ if (((srr1 & wmask) == SRR1_WAKEEE) || ((srr1 & wmask) == SRR1_WAKEHVI)) { - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (xive_enabled()) - xive_flush_interrupt(); - else - icp_opal_flush_interrupt(); - } else - icp_native_flush_interrupt(); + pnv_flush_interrupts(); } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); @@ -266,7 +287,7 @@ static void pnv_smp_cpu_kill_self(void) */ lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1; pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); - +out: DBG("CPU%d coming online...\n", cpu); } -- cgit v1.2.3 From 050668c10047802a2b62cbf8db834c2c84042b87 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 29 Oct 2019 15:51:03 +0100 Subject: bpf, doc: Add Andrii as official reviewer to BPF subsystem Andrii Nakryiko has been part of our weekly BPF patch review rotation for quite some time now and provided excellent and timely feedback on BPF patches, therefore give credit where credit is due and add him officially to the BPF core reviewer team to the MAINTAINERS file. Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/af565dbef3b0b35040f26bfd16ed59cc0bae8066.1572360528.git.daniel@iogearbox.net --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e51a68bf8ca8..808bac0e3847 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3051,6 +3051,7 @@ M: Daniel Borkmann R: Martin KaFai Lau R: Song Liu R: Yonghong Song +R: Andrii Nakryiko L: netdev@vger.kernel.org L: bpf@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git -- cgit v1.2.3 From af8fd0424713a2adb812d10d55e86718152cf656 Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Fri, 18 Oct 2019 11:32:50 -0700 Subject: nvme-multipath: fix possible io hang after ctrl reconnect The following scenario results in an IO hang: 1) ctrl completes a request with NVME_SC_ANA_TRANSITION. NVME_NS_ANA_PENDING bit in ns->flags is set and ana_work is triggered. 2) ana_work: nvme_read_ana_log() tries to get the ANA log page from the ctrl. This fails because ctrl disconnects. Therefore nvme_update_ns_ana_state() is not called and NVME_NS_ANA_PENDING bit in ns->flags is not cleared. 3) ctrl reconnects: nvme_mpath_init(ctrl,...) calls nvme_read_ana_log(ctrl, groups_only=true). However, nvme_update_ana_state() does not update namespaces because nr_nsids = 0 (due to groups_only mode). 4) scan_work calls nvme_validate_ns() finds the ns and re-validates OK. Result: The ctrl is now live but NVME_NS_ANA_PENDING bit in ns->flags is still set. Consequently ctrl will never be considered a viable path by __nvme_find_path(). IO will hang if ctrl is the only or the last path to the namespace. More generally, while ctrl is reconnecting, its ANA state may change. And because nvme_mpath_init() requests ANA log in groups_only mode, these changes are not propagated to the existing ctrl namespaces. This may result in a mal-function or an IO hang. Solution: nvme_mpath_init() will nvme_read_ana_log() with groups_only set to false. This will not harm the new ctrl case (no namespaces present), and will make sure the ANA state of namespaces gets updated after reconnect. Note: Another option would be for nvme_mpath_init() to invoke nvme_parse_ana_log(..., nvme_set_ns_ana_state) for each existing namespace. Reviewed-by: Sagi Grimberg Signed-off-by: Anton Eidelman Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 30de7efef003..d320684d25b2 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -715,7 +715,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) goto out; } - error = nvme_read_ana_log(ctrl, true); + error = nvme_read_ana_log(ctrl, false); if (error) goto out_free_ana_log_buf; return 0; -- cgit v1.2.3 From 86cccfbf773fafb88898c5627aa3727b02bc4708 Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Fri, 18 Oct 2019 11:32:51 -0700 Subject: nvme-multipath: remove unused groups_only mode in ana log groups_only mode in nvme_read_ana_log() is no longer used: remove it. Reviewed-by: Sagi Grimberg Signed-off-by: Anton Eidelman Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/multipath.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d320684d25b2..fc99a40c1ec4 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -522,14 +522,13 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, return 0; } -static int nvme_read_ana_log(struct nvme_ctrl *ctrl, bool groups_only) +static int nvme_read_ana_log(struct nvme_ctrl *ctrl) { u32 nr_change_groups = 0; int error; mutex_lock(&ctrl->ana_lock); - error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, - groups_only ? NVME_ANA_LOG_RGO : 0, + error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0, ctrl->ana_log_buf, ctrl->ana_log_size, 0); if (error) { dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error); @@ -565,7 +564,7 @@ static void nvme_ana_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work); - nvme_read_ana_log(ctrl, false); + nvme_read_ana_log(ctrl); } static void nvme_anatt_timeout(struct timer_list *t) @@ -715,7 +714,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) goto out; } - error = nvme_read_ana_log(ctrl, false); + error = nvme_read_ana_log(ctrl); if (error) goto out_free_ana_log_buf; return 0; -- cgit v1.2.3 From d848074b2f1eb11a38691285f7366bce83087014 Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Tue, 29 Oct 2019 09:13:34 +0000 Subject: um-ubd: Entrust re-queue to the upper layers Fixes crashes due to ubd requeue logic conflicting with the block-mq logic. Crash is reproducible in 5.0 - 5.3. Fixes: 53766defb8c8 ("um: Clean-up command processing in UML UBD driver") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Anton Ivanov Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 612535cd9706..6627d7c30f37 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1403,8 +1403,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, spin_unlock_irq(&ubd_dev->lock); - if (ret < 0) - blk_mq_requeue_request(req, true); + if (ret < 0) { + if (ret == -ENOMEM) + res = BLK_STS_RESOURCE; + else + res = BLK_STS_DEV_RESOURCE; + } return res; } -- cgit v1.2.3 From aa57157be69fb599bd4c38a4b75c5aad74a60ec0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 29 Oct 2019 15:30:51 +0000 Subject: arm64: Ensure VM_WRITE|VM_SHARED ptes are clean by default Shared and writable mappings (__S.1.) should be clean (!dirty) initially and made dirty on a subsequent write either through the hardware DBM (dirty bit management) mechanism or through a write page fault. A clean pte for the arm64 kernel is one that has PTE_RDONLY set and PTE_DIRTY clear. The PAGE_SHARED{,_EXEC} attributes have PTE_WRITE set (PTE_DBM) and PTE_DIRTY clear. Prior to commit 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()"), it was the responsibility of set_pte_at() to set the PTE_RDONLY bit and mark the pte clean if the software PTE_DIRTY bit was not set. However, the above commit removed the pte_sw_dirty() check and the subsequent setting of PTE_RDONLY in set_pte_at() while leaving the PAGE_SHARED{,_EXEC} definitions unchanged. The result is that shared+writable mappings are now dirty by default Fix the above by explicitly setting PTE_RDONLY in PAGE_SHARED{,_EXEC}. In addition, remove the superfluous PTE_DIRTY bit from the kernel PROT_* attributes. Fixes: 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()") Cc: # 4.14.x- Cc: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9a21b84536f2..8dc6c5cdabe6 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -32,11 +32,11 @@ #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -80,8 +80,9 @@ #define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ +#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) #define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) -- cgit v1.2.3 From ca8cb69580236f47041dd045c08f82cb7bb50d7c Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 16 Oct 2019 15:37:06 +0200 Subject: drm/etnaviv: fix deadlock in GPU coredump The GPU coredump function violates the locking order by holding the MMU context lock while trying to acquire the etnaviv_gem_object lock. This results in a possible ABBA deadlock with other codepaths which follow the established locking order. Fortunately this is easy to fix by dropping the MMU context lock earlier, as the BO dumping doesn't need the MMU context to be stable. The only thing the BO dumping cares about are the BO mappings, which are stable across the lifetime of the job. Fixes: 27b67278e007 (drm/etnaviv: rework MMU handling) [ Not really the first bad commit, but the one where this fix applies cleanly. Stable kernels need a manual backport. ] Reported-by: Christian Gmeiner Signed-off-by: Lucas Stach Tested-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_dump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 698db540972c..648cf0207309 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -180,6 +180,8 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit) etnaviv_cmdbuf_get_va(&submit->cmdbuf, &gpu->mmu_context->cmdbuf_mapping)); + mutex_unlock(&gpu->mmu_context->lock); + /* Reserve space for the bomap */ if (n_bomap_pages) { bomap_start = bomap = iter.data; @@ -221,8 +223,6 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit) obj->base.size); } - mutex_unlock(&gpu->mmu_context->lock); - etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data); dev_coredumpv(gpu->dev, iter.start, iter.data - iter.start, GFP_KERNEL); -- cgit v1.2.3 From 18fa692d8020083cd57ce031a4b5a7a4ec8bc50a Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 16 Oct 2019 16:10:21 +0200 Subject: drm/etnaviv: reinstate MMUv1 command buffer window check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The switch to per-process address spaces erroneously dropped the check which validated that the command buffer is mapped through the linear apperture as required by the hardware. This turned a system misconfiguration with a helpful error message into a very hard to debug issue. Reinstate the check at the appropriate location. Fixes: 17e4660ae3d7 (drm/etnaviv: implement per-process address spaces on MMUv2) Signed-off-by: Lucas Stach Reviewed-by: Guido Günther --- drivers/gpu/drm/etnaviv/etnaviv_mmu.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 35ebae6a1be7..3607d348c298 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -328,12 +328,23 @@ etnaviv_iommu_context_init(struct etnaviv_iommu_global *global, ret = etnaviv_cmdbuf_suballoc_map(suballoc, ctx, &ctx->cmdbuf_mapping, global->memory_base); - if (ret) { - global->ops->free(ctx); - return NULL; + if (ret) + goto out_free; + + if (global->version == ETNAVIV_IOMMU_V1 && + ctx->cmdbuf_mapping.iova > 0x80000000) { + dev_err(global->dev, + "command buffer outside valid memory window\n"); + goto out_unmap; } return ctx; + +out_unmap: + etnaviv_cmdbuf_suballoc_unmap(ctx, &ctx->cmdbuf_mapping); +out_free: + global->ops->free(ctx); + return NULL; } void etnaviv_iommu_restore(struct etnaviv_gpu *gpu, -- cgit v1.2.3 From a2f10d4a3069fee666dab20fab5458757ba1f22d Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Fri, 25 Oct 2019 12:39:10 +0200 Subject: drm/etnaviv: fix dumping of iommuv2 etnaviv_iommuv2_dump_size(..) returns the number of PTE * SZ_4K but etnaviv_iommuv2_dump(..) increments buf pointer even if there is no PTE. This results in a bad buf pointer which gets used for memcpy(..), when copying the MMU state in the coredump buffer. Fixes: afb7b3b1deb4 ("drm/etnaviv: implement IOMMUv2 translation") Cc: stable@vger.kernel.org Signed-off-by: Christian Gmeiner Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c index 043111a1d60c..f8bf488e9d71 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c @@ -155,9 +155,11 @@ static void etnaviv_iommuv2_dump(struct etnaviv_iommu_context *context, void *bu memcpy(buf, v2_context->mtlb_cpu, SZ_4K); buf += SZ_4K; - for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++, buf += SZ_4K) - if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT) + for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) + if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT) { memcpy(buf, v2_context->stlb_cpu[i], SZ_4K); + buf += SZ_4K; + } } static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu, -- cgit v1.2.3 From d4af3c4b81f4cd5662baa6f1492f998d89783318 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 29 Oct 2019 10:15:39 -0700 Subject: arm64: cpufeature: Enable Qualcomm Falkor/Kryo errata 1003 With the introduction of 'cce360b54ce6 ("arm64: capabilities: Filter the entries based on a given mask")' the Qualcomm Falkor/Kryo errata 1003 is no long applied. The result of not applying errata 1003 is that MSM8996 runs into various RCU stalls and fails to boot most of the times. Give 1003 a "type" to ensure they are not filtered out in update_cpu_capabilities(). Fixes: cce360b54ce6 ("arm64: capabilities: Filter the entries based on a given mask") Cc: stable@vger.kernel.org Reported-by: Mark Brown Suggested-by: Will Deacon Signed-off-by: Bjorn Andersson Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6c3b10a41bd8..7f9b699969c7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -816,6 +816,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Falkor/Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = cpucap_multi_entry_cap_matches, .match_list = qcom_erratum_1003_list, }, -- cgit v1.2.3 From 85ac30fa2e24f628e9f4f9344460f4015d33fd7d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Oct 2019 12:06:02 +0100 Subject: fjes: Handle workqueue allocation failure In the highly unlikely event that we fail to allocate either of the "/txrx" or "/control" workqueues, we should bail cleanly rather than blindly march on with NULL queue pointer(s) installed in the 'fjes_adapter' instance. Cc: "David S. Miller" Reported-by: Nicolas Waisman Link: https://lore.kernel.org/lkml/CADJ_3a8WFrs5NouXNqS5WYe7rebFP+_A5CheeqAyD_p7DFJJcg@mail.gmail.com/ Signed-off-by: Will Deacon Signed-off-by: David S. Miller --- drivers/net/fjes/fjes_main.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index bbbc1dcb6ab5..b517c1af9de0 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1237,8 +1237,17 @@ static int fjes_probe(struct platform_device *plat_dev) adapter->open_guard = false; adapter->txrx_wq = alloc_workqueue(DRV_NAME "/txrx", WQ_MEM_RECLAIM, 0); + if (unlikely(!adapter->txrx_wq)) { + err = -ENOMEM; + goto err_free_netdev; + } + adapter->control_wq = alloc_workqueue(DRV_NAME "/control", WQ_MEM_RECLAIM, 0); + if (unlikely(!adapter->control_wq)) { + err = -ENOMEM; + goto err_free_txrx_wq; + } INIT_WORK(&adapter->tx_stall_task, fjes_tx_stall_task); INIT_WORK(&adapter->raise_intr_rxdata_task, @@ -1255,7 +1264,7 @@ static int fjes_probe(struct platform_device *plat_dev) hw->hw_res.irq = platform_get_irq(plat_dev, 0); err = fjes_hw_init(&adapter->hw); if (err) - goto err_free_netdev; + goto err_free_control_wq; /* setup MAC address (02:00:00:00:00:[epid])*/ netdev->dev_addr[0] = 2; @@ -1277,6 +1286,10 @@ static int fjes_probe(struct platform_device *plat_dev) err_hw_exit: fjes_hw_exit(&adapter->hw); +err_free_control_wq: + destroy_workqueue(adapter->control_wq); +err_free_txrx_wq: + destroy_workqueue(adapter->txrx_wq); err_free_netdev: free_netdev(netdev); err_out: -- cgit v1.2.3 From 63a41746827cb16dc6ad0d4d761ab4e7dda7a0c3 Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Fri, 25 Oct 2019 21:48:22 +0800 Subject: net: hisilicon: Fix "Trying to free already-free IRQ" When rmmod hip04_eth.ko, we can get the following warning: Task track: rmmod(1623)>bash(1591)>login(1581)>init(1) ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1623 at kernel/irq/manage.c:1557 __free_irq+0xa4/0x2ac() Trying to free already-free IRQ 200 Modules linked in: ping(O) pramdisk(O) cpuinfo(O) rtos_snapshot(O) interrupt_ctrl(O) mtdblock mtd_blkdevrtfs nfs_acl nfs lockd grace sunrpc xt_tcpudp ipt_REJECT iptable_filter ip_tables x_tables nf_reject_ipv CPU: 0 PID: 1623 Comm: rmmod Tainted: G O 4.4.193 #1 Hardware name: Hisilicon A15 [] (rtos_unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xa0/0xd8) [] (dump_stack) from [] (warn_slowpath_common+0x84/0xb0) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x3c/0x68) [] (warn_slowpath_fmt) from [] (__free_irq+0xa4/0x2ac) [] (__free_irq) from [] (free_irq+0x60/0x7c) [] (free_irq) from [] (release_nodes+0x1c4/0x1ec) [] (release_nodes) from [] (__device_release_driver+0xa8/0x104) [] (__device_release_driver) from [] (driver_detach+0xd0/0xf8) [] (driver_detach) from [] (bus_remove_driver+0x64/0x8c) [] (bus_remove_driver) from [] (SyS_delete_module+0x198/0x1e0) [] (SyS_delete_module) from [] (__sys_trace_return+0x0/0x10) ---[ end trace bb25d6123d849b44 ]--- Currently "rmmod hip04_eth.ko" call free_irq more than once as devres_release_all and hip04_remove both call free_irq. This results in a 'Trying to free already-free IRQ' warning. To solve the problem free_irq has been moved out of hip04_remove. Signed-off-by: Jiangfeng Xiao Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index c84167447abe..ad6d91219daf 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -1038,7 +1038,6 @@ static int hip04_remove(struct platform_device *pdev) hip04_free_ring(ndev, d); unregister_netdev(ndev); - free_irq(ndev->irq, ndev); of_node_put(priv->phy_node); cancel_work_sync(&priv->tx_timeout_task); free_netdev(ndev); -- cgit v1.2.3 From 6f39188c9d5f81af7a3bc687636b7abc9629ee27 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 25 Oct 2019 09:30:15 +0800 Subject: drm/panfrost: fix -Wmissing-prototypes warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We get these warnings when build kernel W=1: drivers/gpu/drm/panfrost/panfrost_perfcnt.c:35:6: warning: no previous prototype for ‘panfrost_perfcnt_clean_cache_done’ [-Wmissing-prototypes] drivers/gpu/drm/panfrost/panfrost_perfcnt.c:40:6: warning: no previous prototype for ‘panfrost_perfcnt_sample_done’ [-Wmissing-prototypes] drivers/gpu/drm/panfrost/panfrost_perfcnt.c:190:5: warning: no previous prototype for ‘panfrost_ioctl_perfcnt_enable’ [-Wmissing-prototypes] drivers/gpu/drm/panfrost/panfrost_perfcnt.c:218:5: warning: no previous prototype for ‘panfrost_ioctl_perfcnt_dump’ [-Wmissing-prototypes] drivers/gpu/drm/panfrost/panfrost_perfcnt.c:250:6: warning: no previous prototype for ‘panfrost_perfcnt_close’ [-Wmissing-prototypes] drivers/gpu/drm/panfrost/panfrost_perfcnt.c:264:5: warning: no previous prototype for ‘panfrost_perfcnt_init’ [-Wmissing-prototypes] drivers/gpu/drm/panfrost/panfrost_perfcnt.c:320:6: warning: no previous prototype for ‘panfrost_perfcnt_fini’ [-Wmissing-prototypes] drivers/gpu/drm/panfrost/panfrost_mmu.c:227:6: warning: no previous prototype for ‘panfrost_mmu_flush_range’ [-Wmissing-prototypes] drivers/gpu/drm/panfrost/panfrost_mmu.c:435:5: warning: no previous prototype for ‘panfrost_mmu_map_fault_addr’ [-Wmissing-prototypes] For file panfrost_mmu.c, make functions static to fix this. For file panfrost_perfcnt.c, include header file can fix this. Signed-off-by: Yi Wang Reviewed-by: Steven Price Cc: stable@vger.kernel.org [robh: fixup function parameter alignment] Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/1571967015-42854-1-git-send-email-wang.yi59@zte.com.cn --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 9 +++++---- drivers/gpu/drm/panfrost/panfrost_perfcnt.c | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index bdd990568476..87e7963b8adf 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -224,9 +224,9 @@ static size_t get_pgsize(u64 addr, size_t size) return SZ_2M; } -void panfrost_mmu_flush_range(struct panfrost_device *pfdev, - struct panfrost_mmu *mmu, - u64 iova, size_t size) +static void panfrost_mmu_flush_range(struct panfrost_device *pfdev, + struct panfrost_mmu *mmu, + u64 iova, size_t size) { if (mmu->as < 0) return; @@ -432,7 +432,8 @@ out: #define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE) -int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) +static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, + u64 addr) { int ret, i; struct panfrost_gem_object *bo; diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c index 83c57d325ca8..2dba192bf198 100644 --- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c @@ -16,6 +16,7 @@ #include "panfrost_issues.h" #include "panfrost_job.h" #include "panfrost_mmu.h" +#include "panfrost_perfcnt.h" #include "panfrost_regs.h" #define COUNTERS_PER_BLOCK 64 -- cgit v1.2.3 From f70744c68779c8a72a0c82294e3233b994af656d Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 28 Oct 2019 20:08:25 +0000 Subject: drm/panfrost: Don't dereference bogus MMU pointers It seems that killing an application while faults are occurring (particularly with a GPU in FPGA at a whopping 40MHz) can lead to handling a lingering page fault after all the address space contexts have already been freed. In this situation, the LRU list is empty so addr_to_drm_mm_node() ends up dereferencing the list head as if it were a struct panfrost_mmu entry; this leaves "mmu->as" actually pointing at the pfdev->alloc_mask bitmap, which is also empty, and given that the fault has a high likelihood of being in AS0, hilarity ensues. Sadly, the cleanest solution seems to involve another goto. Oh well, at least it's robust... Fixes: 65e51e30d862 ("drm/panfrost: Prevent race when handling page fault") Signed-off-by: Robin Murphy Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/9a0b09e6b5851f0d4428b72dd6b8b4c0d0ef4206.1572293305.git.robin.murphy@arm.com --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 87e7963b8adf..a3ed64a1f15e 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -406,11 +406,11 @@ addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr) spin_lock(&pfdev->as_lock); list_for_each_entry(mmu, &pfdev->as_lru_list, list) { if (as == mmu->as) - break; + goto found_mmu; } - if (as != mmu->as) - goto out; + goto out; +found_mmu: priv = container_of(mmu, struct panfrost_file_priv, mmu); spin_lock(&priv->mm_lock); -- cgit v1.2.3 From ea60ed6fcf29eebc78f2ce91491e6309ee005a01 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 25 Oct 2019 14:05:24 +0100 Subject: ceph: fix use-after-free in __ceph_remove_cap() KASAN reports a use-after-free when running xfstest generic/531, with the following trace: [ 293.903362] kasan_report+0xe/0x20 [ 293.903365] rb_erase+0x1f/0x790 [ 293.903370] __ceph_remove_cap+0x201/0x370 [ 293.903375] __ceph_remove_caps+0x4b/0x70 [ 293.903380] ceph_evict_inode+0x4e/0x360 [ 293.903386] evict+0x169/0x290 [ 293.903390] __dentry_kill+0x16f/0x250 [ 293.903394] dput+0x1c6/0x440 [ 293.903398] __fput+0x184/0x330 [ 293.903404] task_work_run+0xb9/0xe0 [ 293.903410] exit_to_usermode_loop+0xd3/0xe0 [ 293.903413] do_syscall_64+0x1a0/0x1c0 [ 293.903417] entry_SYSCALL_64_after_hwframe+0x44/0xa9 This happens because __ceph_remove_cap() may queue a cap release (__ceph_queue_cap_release) which can be scheduled before that cap is removed from the inode list with rb_erase(&cap->ci_node, &ci->i_caps); And, when this finally happens, the use-after-free will occur. This can be fixed by removing the cap from the inode list before being removed from the session list, and thus eliminating the risk of an UAF. Cc: stable@vger.kernel.org Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index d3b9c9d5c1bd..f5a38910a82b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1058,6 +1058,11 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); + /* remove from inode's cap rbtree, and clear auth cap */ + rb_erase(&cap->ci_node, &ci->i_caps); + if (ci->i_auth_cap == cap) + ci->i_auth_cap = NULL; + /* remove from session list */ spin_lock(&session->s_cap_lock); if (session->s_cap_iterator == cap) { @@ -1091,11 +1096,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) spin_unlock(&session->s_cap_lock); - /* remove from inode list */ - rb_erase(&cap->ci_node, &ci->i_caps); - if (ci->i_auth_cap == cap) - ci->i_auth_cap = NULL; - if (removed) ceph_put_cap(mdsc, cap); -- cgit v1.2.3 From aa8dd816732b2bab28c54bc4d2ccf3fc8a6e0892 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 29 Oct 2019 13:50:19 +0000 Subject: ceph: fix RCU case handling in ceph_d_revalidate() For RCU case ->d_revalidate() is called with rcu_read_lock() and without pinning the dentry passed to it. Which means that it can't rely upon ->d_inode remaining stable; that's the reason for d_inode_rcu(), actually. Make sure we don't reload ->d_inode there. Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4ca0b8ff9a72..d17a789fd856 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1553,36 +1553,37 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) { int valid = 0; struct dentry *parent; - struct inode *dir; + struct inode *dir, *inode; if (flags & LOOKUP_RCU) { parent = READ_ONCE(dentry->d_parent); dir = d_inode_rcu(parent); if (!dir) return -ECHILD; + inode = d_inode_rcu(dentry); } else { parent = dget_parent(dentry); dir = d_inode(parent); + inode = d_inode(dentry); } dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry, - dentry, d_inode(dentry), ceph_dentry(dentry)->offset); + dentry, inode, ceph_dentry(dentry)->offset); /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry, - dentry, d_inode(dentry)); + dentry, inode); valid = 1; - } else if (d_really_is_positive(dentry) && - ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) { + } else if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { valid = 1; } else { valid = dentry_lease_is_valid(dentry, flags); if (valid == -ECHILD) return valid; if (valid || dir_lease_is_valid(dir, dentry)) { - if (d_really_is_positive(dentry)) - valid = ceph_is_any_caps(d_inode(dentry)); + if (inode) + valid = ceph_is_any_caps(inode); else valid = 1; } -- cgit v1.2.3 From 1f08529c84cfecaf1261ed9b7e17fab18541c58f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 29 Oct 2019 13:53:29 +0000 Subject: ceph: add missing check in d_revalidate snapdir handling We should not play with dcache without parent locked... Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9f135624ae47..c07407586ce8 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1434,6 +1434,7 @@ retry_lookup: dout(" final dn %p\n", dn); } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || req->r_op == CEPH_MDS_OP_MKSNAP) && + test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { struct inode *dir = req->r_parent; -- cgit v1.2.3 From dd7ebe6787328dec8b45f97c93d3dfdc4a4e4fde Mon Sep 17 00:00:00 2001 From: Anna Karas Date: Thu, 26 Sep 2019 15:35:59 +0300 Subject: drm/i915/tgl: Fix doc not corresponding to code Replace PLLs names used in documentation to that used in the code. Cc: Vandita Kulkarni Fixes: 68ff39c3f8c0 ("drm/i915/tgl: Add new pll ids") Signed-off-by: Anna Karas Reviewed-by: Vandita Kulkarni Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190926123559.15717-1-anna.karas@intel.com (cherry picked from commit d328bd4f905834c7d87a49962ebc96e397aab7b9) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dpll_mgr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h index e7588799fce5..104cf6d42333 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h @@ -147,11 +147,11 @@ enum intel_dpll_id { */ DPLL_ID_ICL_MGPLL4 = 6, /** - * @DPLL_ID_TGL_TCPLL5: TGL TC PLL port 5 (TC5) + * @DPLL_ID_TGL_MGPLL5: TGL TC PLL port 5 (TC5) */ DPLL_ID_TGL_MGPLL5 = 7, /** - * @DPLL_ID_TGL_TCPLL6: TGL TC PLL port 6 (TC6) + * @DPLL_ID_TGL_MGPLL6: TGL TC PLL port 6 (TC6) */ DPLL_ID_TGL_MGPLL6 = 8, }; -- cgit v1.2.3 From 6f3ef5c25cc762687a7341c18cbea5af54461407 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 25 Oct 2019 23:53:30 -0500 Subject: wimax: i2400: Fix memory leak in i2400m_op_rfkill_sw_toggle In the implementation of i2400m_op_rfkill_sw_toggle() the allocated buffer for cmd should be released before returning. The documentation for i2400m_msg_to_dev() says when it returns the buffer can be reused. Meaning cmd should be released in either case. Move kfree(cmd) before return to be reached by all execution paths. Fixes: 2507e6ab7a9a ("wimax: i2400: fix memory leak") Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/op-rfkill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wimax/i2400m/op-rfkill.c b/drivers/net/wimax/i2400m/op-rfkill.c index 8efb493ceec2..5c79f052cad2 100644 --- a/drivers/net/wimax/i2400m/op-rfkill.c +++ b/drivers/net/wimax/i2400m/op-rfkill.c @@ -127,12 +127,12 @@ int i2400m_op_rfkill_sw_toggle(struct wimax_dev *wimax_dev, "%d\n", result); result = 0; error_cmd: - kfree(cmd); kfree_skb(ack_skb); error_msg_to_dev: error_alloc: d_fnend(4, dev, "(wimax_dev %p state %d) = %d\n", wimax_dev, state, result); + kfree(cmd); return result; } -- cgit v1.2.3 From 1c44ce560b4de639f237b458be1729489ff44d0a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Oct 2019 21:04:26 +0300 Subject: net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up Background information: the driver operates the hardware in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. Its value is held in port->vid. Consider the following command sequence (no network manager, all interfaces are down, debugging prints added by me): $ ip link add dev br0 type bridge vlan_filtering 1 $ ip link set dev swp0 master br0 Kernel code path during last command: br_add_slave -> ocelot_netdevice_port_event (NETDEV_CHANGEUPPER): [ 21.401901] ocelot_vlan_port_apply: port 0 vlan aware 0 pvid 0 vid 0 br_add_slave -> nbp_vlan_init -> switchdev_port_attr_set -> ocelot_port_attr_set (SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING): [ 21.413335] ocelot_vlan_port_apply: port 0 vlan aware 1 pvid 0 vid 0 br_add_slave -> nbp_vlan_init -> nbp_vlan_add -> br_switchdev_port_vlan_add -> switchdev_port_obj_add -> ocelot_port_obj_add -> ocelot_vlan_vid_add [ 21.667421] ocelot_vlan_port_apply: port 0 vlan aware 1 pvid 1 vid 1 So far so good. The bridge has replaced the driver's default pvid used in standalone mode (0) with its own default_pvid (1). The port's vid (native VLAN) has also changed from 0 to 1. $ ip link set dev swp0 up [ 31.722956] 8021q: adding VLAN 0 to HW filter on device swp0 do_setlink -> dev_change_flags -> vlan_vid_add -> ocelot_vlan_rx_add_vid -> ocelot_vlan_vid_add: [ 31.728700] ocelot_vlan_port_apply: port 0 vlan aware 1 pvid 1 vid 0 The 8021q module uses the .ndo_vlan_rx_add_vid API on .ndo_open to make ports be able to transmit and receive 802.1p-tagged traffic by default. This API is supposed to offload a VLAN sub-interface, which for a switch port means to add a VLAN that is not a pvid, and tagged on egress. But the driver implementation of .ndo_vlan_rx_add_vid is wrong: it adds back vid 0 as "egress untagged". Now back to the initial paragraph: there is a single untagged VID that the driver keeps track of, and that has just changed from 1 (the pvid) to 0. So this breaks the bridge core's expectation, because it has changed vid 1 from untagged to tagged, when what the user sees is. $ bridge vlan port vlan ids swp0 1 PVID Egress Untagged br0 1 PVID Egress Untagged But curiously, instead of manifesting itself as "untagged and pvid-tagged traffic gets sent as tagged on egress", the bug: - is hidden when vlan_filtering=0 - manifests as dropped traffic when vlan_filtering=1, due to this setting: if (port->vlan_aware && !port->vid) /* If port is vlan-aware and tagged, drop untagged and priority * tagged frames. */ val |= ANA_PORT_DROP_CFG_DROP_UNTAGGED_ENA | ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; which would have made sense if it weren't for this bug. The setting's intention was "this is a trunk port with no native VLAN, so don't accept untagged traffic". So the driver was never expecting to set VLAN 0 as the value of the native VLAN, 0 was just encoding for "invalid". So the fix is to not send 802.1p traffic as untagged, because that would change the port's native vlan to 0, unbeknownst to the bridge, and trigger unexpected code paths in the driver. Cc: Antoine Tenart Cc: Alexandre Belloni Fixes: 7142529f1688 ("net: mscc: ocelot: add VLAN filtering") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Acked-by: Alexandre Belloni Reviewed-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 4d1bce4389c7..a891bccb3a41 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -934,7 +934,7 @@ end: static int ocelot_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { - return ocelot_vlan_vid_add(dev, vid, false, true); + return ocelot_vlan_vid_add(dev, vid, false, false); } static int ocelot_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, -- cgit v1.2.3 From b9cd75e6689560140dadaed98eb4b41aad75d55d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Oct 2019 21:04:27 +0300 Subject: net: mscc: ocelot: refuse to overwrite the port's native vlan The switch driver keeps a "vid" variable per port, which signifies _the_ VLAN ID that is stripped on that port's egress (aka the native VLAN on a trunk port). That is the way the hardware is designed (mostly). The port->vid is programmed into REW:PORT:PORT_VLAN_CFG:PORT_VID and the rewriter is told to send all traffic as tagged except the one having port->vid. There exists a possibility of finer-grained egress untagging decisions: using the VCAP IS1 engine, one rule can be added to match every VLAN-tagged frame whose VLAN should be untagged, and set POP_CNT=1 as action. However, the IS1 can hold at most 512 entries, and the VLANs are in the order of 6 * 4096. So the code is fine for now. But this sequence of commands: $ bridge vlan add dev swp0 vid 1 pvid untagged $ bridge vlan add dev swp0 vid 2 untagged makes untagged and pvid-tagged traffic be sent out of swp0 as tagged with VID 1, despite user's request. Prevent that from happening. The user should temporarily remove the existing untagged VLAN (1 in this case), add it back as tagged, and then add the new untagged VLAN (2 in this case). Cc: Antoine Tenart Cc: Alexandre Belloni Fixes: 7142529f1688 ("net: mscc: ocelot: add VLAN filtering") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Acked-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index a891bccb3a41..344539c0d3aa 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -261,8 +261,15 @@ static int ocelot_vlan_vid_add(struct net_device *dev, u16 vid, bool pvid, port->pvid = vid; /* Untagged egress vlan clasification */ - if (untagged) + if (untagged && port->vid != vid) { + if (port->vid) { + dev_err(ocelot->dev, + "Port already has a native VLAN: %d\n", + port->vid); + return -EBUSY; + } port->vid = vid; + } ocelot_vlan_port_apply(ocelot, port); -- cgit v1.2.3 From 6dfef396ea13873ae9066ee2e0ad6ee364031fe2 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 11 Sep 2019 14:44:50 +0300 Subject: net/mlx5: Fix flow counter list auto bits struct The union should contain the extended dest and counter list. Remove the resevered 0x40 bits which is redundant. This change doesn't break any functionally. Everything works today because the code in fs_cmd.c is using the correct structs if extended dest or the basic dest. Fixes: 1b115498598f ("net/mlx5: Introduce extended destination fields") Signed-off-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 138c50d5a353..0836fe232f97 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1545,9 +1545,8 @@ struct mlx5_ifc_extended_dest_format_bits { }; union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits { - struct mlx5_ifc_dest_format_struct_bits dest_format_struct; + struct mlx5_ifc_extended_dest_format_bits extended_dest_format; struct mlx5_ifc_flow_counter_list_bits flow_counter_list; - u8 reserved_at_0[0x40]; }; struct mlx5_ifc_fte_match_param_bits { -- cgit v1.2.3 From d5dbcc4e87bc8444bd2f1ca4b8f787e1e5677ec2 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 4 Sep 2019 12:32:49 +0000 Subject: net/mlx5e: Determine source port properly for vlan push action Termination tables are used for vlan push actions on uplink ports. To support RoCE dual port the source port value was placed in a register. Fix the code to use an API method returning the source port according to the FW capabilities. Fixes: 10caabdaad5a ("net/mlx5e: Use termination table for VLAN push actions") Signed-off-by: Dmytro Linkin Reviewed-by: Jianbo Liu Reviewed-by: Oz Shlomo Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads_termtbl.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 1d55a324a17e..7879e1746297 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -177,22 +177,32 @@ mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, memset(&src->vlan[1], 0, sizeof(src->vlan[1])); } +static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, + const struct mlx5_flow_spec *spec) +{ + u32 port_mask, port_value; + + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) + return spec->flow_context.flow_source == MLX5_VPORT_UPLINK; + + port_mask = MLX5_GET(fte_match_param, spec->match_criteria, + misc_parameters.source_port); + port_value = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.source_port); + return (port_mask & port_value & 0xffff) == MLX5_VPORT_UPLINK; +} + bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec) { - u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria, - misc_parameters.source_port); - u32 port_value = MLX5_GET(fte_match_param, spec->match_value, - misc_parameters.source_port); - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) return false; /* push vlan on RX */ return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && - ((port_mask & port_value) == MLX5_VPORT_UPLINK); + mlx5_eswitch_offload_is_uplink_port(esw, spec); } struct mlx5_flow_handle * -- cgit v1.2.3 From 752d3dc06d6936d5a357a18b6b51d91c7e134e88 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Thu, 29 Aug 2019 15:24:27 +0000 Subject: net/mlx5e: Remove incorrect match criteria assignment line Driver have function, which enable match criteria for misc parameters in dependence of eswitch capabilities. Fixes: 4f5d1beadc10 ("Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux") Signed-off-by: Dmytro Linkin Reviewed-by: Jianbo Liu Reviewed-by: Roi Dayan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 00d71db15f22..369499e88fe8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -285,7 +285,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, mlx5_eswitch_set_rule_source_port(esw, spec, attr); - spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; -- cgit v1.2.3 From 5dfb6335cbecbd59040275c8396c2d0af0bbd549 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 16 Sep 2019 13:17:33 +0300 Subject: net/mlx5e: Replace kfree with kvfree when free vhca stats Memory allocated by kvzalloc should be freed by kvfree. Fixes: cef35af34d6d ("net/mlx5e: Add mlx5e HV VHCA stats agent") Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c index b3a249b2a482..ac44bbe95c5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -141,7 +141,7 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) "Failed to create hv vhca stats agent, err = %ld\n", PTR_ERR(agent)); - kfree(priv->stats_agent.buf); + kvfree(priv->stats_agent.buf); return IS_ERR_OR_NULL(agent); } @@ -157,5 +157,5 @@ void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) return; mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent); - kfree(priv->stats_agent.buf); + kvfree(priv->stats_agent.buf); } -- cgit v1.2.3 From 64d7b68577130ae00f954a28ea9d6bc51025caf9 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 24 Sep 2019 10:19:16 +0300 Subject: net/mlx5e: Only skip encap flows update when encap init failed When encap entry initialization completes successfully e->compl_result is set to positive value and not zero, like mlx5e_rep_update_flows() assumes at the moment. Fix the conditional to only skip encap flows update when e->compl_result < 0. Fixes: 2a1f1768fa17 ("net/mlx5e: Refactor neigh update for concurrent execution") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 95892a3b63a1..cd9bb7c7b341 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -611,8 +611,8 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, mutex_lock(&esw->offloads.encap_tbl_lock); encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); - if (e->compl_result || (encap_connected == neigh_connected && - ether_addr_equal(e->h_dest, ha))) + if (e->compl_result < 0 || (encap_connected == neigh_connected && + ether_addr_equal(e->h_dest, ha))) goto unlock; mlx5e_take_all_encap_flows(e, &flow_list); -- cgit v1.2.3 From 2347cee83b2bd868bde2d283db0fac89f22be4e0 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 19 Sep 2019 15:58:14 -0500 Subject: net/mlx5: Fix rtable reference leak If the rt entry gateway family is not AF_INET for multipath device, rtable reference is leaked. Hence, fix it by releasing the reference. Fixes: 5fb091e8130b ("net/mlx5e: Use hint to resolve route when in HW multipath mode") Fixes: e32ee6c78efa ("net/mlx5e: Support tunnel encap over tagged Ethernet") Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index f8ee18b4da6f..13af72556987 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -97,15 +97,19 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, if (ret) return ret; - if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) + if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) { + ip_rt_put(rt); return -ENETUNREACH; + } #else return -EOPNOTSUPP; #endif ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev); - if (ret < 0) + if (ret < 0) { + ip_rt_put(rt); return ret; + } if (!(*out_ttl)) *out_ttl = ip4_dst_hoplimit(&rt->dst); @@ -149,8 +153,10 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, *out_ttl = ip6_dst_hoplimit(dst); ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev); - if (ret < 0) + if (ret < 0) { + dst_release(dst); return ret; + } #else return -EOPNOTSUPP; #endif -- cgit v1.2.3 From 0fd79b1e17bec8460039f6bdb57163a0442110d9 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Tue, 15 Oct 2019 12:44:18 +0000 Subject: net/mlx5: Fix NULL pointer dereference in extended destination The cited commit refactored the encap id into a struct pointed from the destination. Bug fix for the case there is no encap for one of the destinations. Fixes: 2b688ea5efde ("net/mlx5: Add flow steering actions to fs_cmd shim layer") Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 579c306caa7b..3c816e81f8d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -507,7 +507,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, dst->dest_attr.vport.vhca_id); - if (extended_dest) { + if (extended_dest && + dst->dest_attr.vport.pkt_reformat) { MLX5_SET(dest_format_struct, in_dests, packet_reformat, !!(dst->dest_attr.vport.flags & -- cgit v1.2.3 From 2a4b6526236791a1bb8092079ad87a1629e78db5 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 10 Sep 2019 14:38:17 +0300 Subject: net/mlx5e: Don't store direct pointer to action's tunnel info Geneve implementation changed mlx5 tc to user direct pointer to tunnel_key action's internal struct ip_tunnel_info instance. However, this leads to use-after-free error when initial filter that caused creation of new encap entry is deleted or when tunnel_key action is manually overwritten through action API. Moreover, with recent TC offloads API unlocking change struct flow_action_entry->tunnel point to temporal copy of tunnel info that is deallocated after filter is offloaded to hardware which causes bug to reproduce every time new filter is attached to existing encap entry with following KASAN bug: [ 314.885555] ================================================================== [ 314.886641] BUG: KASAN: use-after-free in memcmp+0x2c/0x60 [ 314.886864] Read of size 1 at addr ffff88886c746280 by task tc/2682 [ 314.887179] CPU: 22 PID: 2682 Comm: tc Not tainted 5.3.0-rc7+ #703 [ 314.887188] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 314.887195] Call Trace: [ 314.887215] dump_stack+0x9a/0xf0 [ 314.887236] print_address_description+0x67/0x323 [ 314.887248] ? memcmp+0x2c/0x60 [ 314.887257] ? memcmp+0x2c/0x60 [ 314.887272] __kasan_report.cold+0x1a/0x3d [ 314.887474] ? __mlx5e_tc_del_fdb_peer_flow+0x100/0x1b0 [mlx5_core] [ 314.887484] ? memcmp+0x2c/0x60 [ 314.887509] kasan_report+0xe/0x12 [ 314.887521] memcmp+0x2c/0x60 [ 314.887662] mlx5e_tc_add_fdb_flow+0x51b/0xbe0 [mlx5_core] [ 314.887838] ? mlx5e_encap_take+0x110/0x110 [mlx5_core] [ 314.887902] ? lockdep_init_map+0x87/0x2c0 [ 314.887924] ? __init_waitqueue_head+0x4f/0x60 [ 314.888062] ? mlx5e_alloc_flow.isra.0+0x18c/0x1c0 [mlx5_core] [ 314.888207] __mlx5e_add_fdb_flow+0x2d7/0x440 [mlx5_core] [ 314.888359] ? mlx5e_tc_update_neigh_used_value+0x6f0/0x6f0 [mlx5_core] [ 314.888374] ? match_held_lock+0x2e/0x240 [ 314.888537] mlx5e_configure_flower+0x830/0x16a0 [mlx5_core] [ 314.888702] ? __mlx5e_add_fdb_flow+0x440/0x440 [mlx5_core] [ 314.888713] ? down_read+0x118/0x2c0 [ 314.888728] ? down_read_killable+0x300/0x300 [ 314.888882] ? mlx5e_rep_get_ethtool_stats+0x180/0x180 [mlx5_core] [ 314.888899] tc_setup_cb_add+0x127/0x270 [ 314.888937] fl_hw_replace_filter+0x2ac/0x380 [cls_flower] [ 314.888976] ? fl_hw_destroy_filter+0x1b0/0x1b0 [cls_flower] [ 314.888990] ? fl_change+0xbcf/0x27ef [cls_flower] [ 314.889030] ? fl_change+0xa57/0x27ef [cls_flower] [ 314.889069] fl_change+0x16bd/0x27ef [cls_flower] [ 314.889135] ? __rhashtable_insert_fast.constprop.0+0xa00/0xa00 [cls_flower] [ 314.889167] ? __radix_tree_lookup+0xa4/0x130 [ 314.889200] ? fl_get+0x169/0x240 [cls_flower] [ 314.889218] ? fl_walk+0x230/0x230 [cls_flower] [ 314.889249] tc_new_tfilter+0x5e1/0xd40 [ 314.889281] ? __rhashtable_insert_fast.constprop.0+0xa00/0xa00 [cls_flower] [ 314.889309] ? tc_del_tfilter+0xa30/0xa30 [ 314.889335] ? __lock_acquire+0x5b5/0x2460 [ 314.889378] ? find_held_lock+0x85/0xa0 [ 314.889442] ? tc_del_tfilter+0xa30/0xa30 [ 314.889465] rtnetlink_rcv_msg+0x4ab/0x5f0 [ 314.889488] ? rtnl_dellink+0x490/0x490 [ 314.889518] ? lockdep_hardirqs_on+0x260/0x260 [ 314.889538] ? netlink_deliver_tap+0xab/0x5a0 [ 314.889550] ? match_held_lock+0x1b/0x240 [ 314.889575] netlink_rcv_skb+0xd0/0x200 [ 314.889588] ? rtnl_dellink+0x490/0x490 [ 314.889605] ? netlink_ack+0x440/0x440 [ 314.889635] ? netlink_deliver_tap+0x161/0x5a0 [ 314.889648] ? lock_downgrade+0x360/0x360 [ 314.889657] ? lock_acquire+0xe5/0x210 [ 314.889686] netlink_unicast+0x296/0x350 [ 314.889707] ? netlink_attachskb+0x390/0x390 [ 314.889726] ? _copy_from_iter_full+0xe0/0x3a0 [ 314.889738] ? __virt_addr_valid+0xbb/0x130 [ 314.889771] netlink_sendmsg+0x394/0x600 [ 314.889800] ? netlink_unicast+0x350/0x350 [ 314.889817] ? move_addr_to_kernel.part.0+0x90/0x90 [ 314.889852] ? netlink_unicast+0x350/0x350 [ 314.889872] sock_sendmsg+0x96/0xa0 [ 314.889891] ___sys_sendmsg+0x482/0x520 [ 314.889919] ? copy_msghdr_from_user+0x250/0x250 [ 314.889930] ? __fput+0x1fa/0x390 [ 314.889941] ? task_work_run+0xb7/0xf0 [ 314.889957] ? exit_to_usermode_loop+0x117/0x120 [ 314.889972] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 314.889982] ? do_syscall_64+0x74/0xe0 [ 314.889992] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 314.890012] ? mark_lock+0xac/0x9a0 [ 314.890028] ? __lock_acquire+0x5b5/0x2460 [ 314.890053] ? mark_lock+0xac/0x9a0 [ 314.890083] ? __lock_acquire+0x5b5/0x2460 [ 314.890112] ? match_held_lock+0x1b/0x240 [ 314.890144] ? __fget_light+0xa1/0xf0 [ 314.890166] ? sockfd_lookup_light+0x91/0xb0 [ 314.890187] __sys_sendmsg+0xba/0x130 [ 314.890201] ? __sys_sendmsg_sock+0xb0/0xb0 [ 314.890225] ? __blkcg_punt_bio_submit+0xd0/0xd0 [ 314.890264] ? lockdep_hardirqs_off+0xbe/0x100 [ 314.890274] ? mark_held_locks+0x24/0x90 [ 314.890286] ? do_syscall_64+0x1e/0xe0 [ 314.890308] do_syscall_64+0x74/0xe0 [ 314.890325] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 314.890336] RIP: 0033:0x7f00ca33d7b8 [ 314.890348] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 65 8f 0c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28 89 5 4 [ 314.890356] RSP: 002b:00007ffea2983928 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 314.890369] RAX: ffffffffffffffda RBX: 000000005d777d5b RCX: 00007f00ca33d7b8 [ 314.890377] RDX: 0000000000000000 RSI: 00007ffea2983990 RDI: 0000000000000003 [ 314.890384] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000006 [ 314.890392] R10: 0000000000404eda R11: 0000000000000246 R12: 0000000000000001 [ 314.890400] R13: 000000000047f640 R14: 00007ffea2987b58 R15: 0000000000000021 [ 314.890529] Allocated by task 2687: [ 314.890684] save_stack+0x1b/0x80 [ 314.890694] __kasan_kmalloc.constprop.0+0xc2/0xd0 [ 314.890705] __kmalloc_track_caller+0x102/0x340 [ 314.890721] kmemdup+0x1d/0x40 [ 314.890730] tc_setup_flow_action+0x731/0x2c27 [ 314.890743] fl_hw_replace_filter+0x23b/0x380 [cls_flower] [ 314.890756] fl_change+0x16bd/0x27ef [cls_flower] [ 314.890765] tc_new_tfilter+0x5e1/0xd40 [ 314.890776] rtnetlink_rcv_msg+0x4ab/0x5f0 [ 314.890786] netlink_rcv_skb+0xd0/0x200 [ 314.890796] netlink_unicast+0x296/0x350 [ 314.890805] netlink_sendmsg+0x394/0x600 [ 314.890815] sock_sendmsg+0x96/0xa0 [ 314.890825] ___sys_sendmsg+0x482/0x520 [ 314.890834] __sys_sendmsg+0xba/0x130 [ 314.890844] do_syscall_64+0x74/0xe0 [ 314.890854] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 314.890937] Freed by task 2687: [ 314.891076] save_stack+0x1b/0x80 [ 314.891086] __kasan_slab_free+0x12c/0x170 [ 314.891095] kfree+0xeb/0x2f0 [ 314.891106] tc_cleanup_flow_action+0x69/0xa0 [ 314.891119] fl_hw_replace_filter+0x2c5/0x380 [cls_flower] [ 314.891132] fl_change+0x16bd/0x27ef [cls_flower] [ 314.891140] tc_new_tfilter+0x5e1/0xd40 [ 314.891151] rtnetlink_rcv_msg+0x4ab/0x5f0 [ 314.891161] netlink_rcv_skb+0xd0/0x200 [ 314.891170] netlink_unicast+0x296/0x350 [ 314.891180] netlink_sendmsg+0x394/0x600 [ 314.891190] sock_sendmsg+0x96/0xa0 [ 314.891200] ___sys_sendmsg+0x482/0x520 [ 314.891208] __sys_sendmsg+0xba/0x130 [ 314.891218] do_syscall_64+0x74/0xe0 [ 314.891228] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 314.891315] The buggy address belongs to the object at ffff88886c746280 which belongs to the cache kmalloc-96 of size 96 [ 314.891762] The buggy address is located 0 bytes inside of 96-byte region [ffff88886c746280, ffff88886c7462e0) [ 314.892196] The buggy address belongs to the page: [ 314.892387] page:ffffea0021b1d180 refcount:1 mapcount:0 mapping:ffff88835d00ef80 index:0x0 [ 314.892398] flags: 0x57ffffc0000200(slab) [ 314.892413] raw: 0057ffffc0000200 ffffea00219e0340 0000000800000008 ffff88835d00ef80 [ 314.892423] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000 [ 314.892430] page dumped because: kasan: bad access detected [ 314.892515] Memory state around the buggy address: [ 314.892707] ffff88886c746180: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ 314.892976] ffff88886c746200: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ 314.893251] >ffff88886c746280: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ 314.893522] ^ [ 314.893657] ffff88886c746300: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ 314.893924] ffff88886c746380: 00 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc [ 314.894189] ================================================================== Fix the issue by duplicating tunnel info into per-encap copy that is deallocated with encap structure. Also, duplicate tunnel info in flow parse attribute to support cases when flow might be attached asynchronously. Fixes: 1f6da30697d0 ("net/mlx5e: Geneve, Keep tunnel info as pointer to the original struct") Signed-off-by: Vlad Buslov Reviewed-by: Yevgeny Kliteynik Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 34 ++++++++++++++++++++----- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index c4c59d2e676e..fda0b37075e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1278,8 +1278,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_eswitch_del_vlan_action(esw, attr); for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) - if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { mlx5e_detach_encap(priv, flow, out_index); + kfree(attr->parse_attr->tun_info[out_index]); + } kvfree(attr->parse_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) @@ -1559,6 +1561,7 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); } + kfree(e->tun_info); kfree(e->encap_header); kfree_rcu(e, rcu); } @@ -2972,6 +2975,13 @@ mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, return NULL; } +static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info) +{ + size_t tun_size = sizeof(*tun_info) + tun_info->options_len; + + return kmemdup(tun_info, tun_size, GFP_KERNEL); +} + static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct net_device *mirred_dev, @@ -3028,13 +3038,15 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, refcount_set(&e->refcnt, 1); init_completion(&e->res_ready); + tun_info = dup_tun_info(tun_info); + if (!tun_info) { + err = -ENOMEM; + goto out_err_init; + } e->tun_info = tun_info; err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); - if (err) { - kfree(e); - e = NULL; - goto out_err; - } + if (err) + goto out_err_init; INIT_LIST_HEAD(&e->flows); hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); @@ -3075,6 +3087,12 @@ out_err: if (e) mlx5e_encap_put(priv, e); return err; + +out_err_init: + mutex_unlock(&esw->offloads.encap_tbl_lock); + kfree(tun_info); + kfree(e); + return err; } static int parse_tc_vlan_action(struct mlx5e_priv *priv, @@ -3295,7 +3313,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } else if (encap) { parse_attr->mirred_ifindex[attr->out_count] = out_dev->ifindex; - parse_attr->tun_info[attr->out_count] = info; + parse_attr->tun_info[attr->out_count] = dup_tun_info(info); + if (!parse_attr->tun_info[attr->out_count]) + return -ENOMEM; encap = false; attr->dests[attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; -- cgit v1.2.3 From 9df86bdb6746d7fcfc2fda715f7a7c3d0ddb2654 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 16 Sep 2019 14:54:20 +0300 Subject: net/mlx5e: Fix handling of compressed CQEs in case of low NAPI budget When CQE compression is enabled, compressed CQEs use the following structure: a title is followed by one or many blocks, each containing 8 mini CQEs (except the last, which may contain fewer mini CQEs). Due to NAPI budget restriction, a complete structure is not always parsed in one NAPI run, and some blocks with mini CQEs may be deferred to the next NAPI poll call - we have the mlx5e_decompress_cqes_cont call in the beginning of mlx5e_poll_rx_cq. However, if the budget is extremely low, some blocks may be left even after that, but the code that follows the mlx5e_decompress_cqes_cont call doesn't check it and assumes that a new CQE begins, which may not be the case. In such cases, random memory corruptions occur. An extremely low NAPI budget of 8 is used when busy_poll or busy_read is active. This commit adds a check to make sure that the previous compressed CQE has been completely parsed after mlx5e_decompress_cqes_cont, otherwise it prevents a new CQE from being fetched in the middle of a compressed CQE. This commit fixes random crashes in __build_skb, __page_pool_put_page and other not-related-directly places, that used to happen when both CQE compression and busy_poll/busy_read were enabled. Fixes: 7219ab34f184 ("net/mlx5e: CQE compression") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d6a547238de0..82cffb3a9964 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1386,8 +1386,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return 0; - if (rq->cqd.left) + if (rq->cqd.left) { work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget); + if (rq->cqd.left || work_done >= budget) + goto out; + } cqe = mlx5_cqwq_get_cqe(cqwq); if (!cqe) { -- cgit v1.2.3 From 534e7366f41b0c689b01af4375aefcd1462adedf Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 2 Oct 2019 16:53:21 +0300 Subject: net/mlx5e: Fix ethtool self test: link speed Ethtool self test contains a test for link speed. This test reads the PTYS register and determines whether the current speed is valid or not. Change current implementation to use the function mlx5e_port_linkspeed() that does the same check and fails when speed is invalid. This code redundancy lead to a bug when mlx5e_port_linkspeed() was updated with expended speeds and the self test was not. Fixes: 2c81bfd5ae56 ("net/mlx5e: Move port speed code from en_ethtool.c to en/port.c") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 840ec945ccba..bbff8d8ded76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -35,6 +35,7 @@ #include #include #include "en.h" +#include "en/port.h" enum { MLX5E_ST_LINK_STATE, @@ -80,22 +81,12 @@ static int mlx5e_test_link_state(struct mlx5e_priv *priv) static int mlx5e_test_link_speed(struct mlx5e_priv *priv) { - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - u32 eth_proto_oper; - int i; + u32 speed; if (!netif_carrier_ok(priv->netdev)) return 1; - if (mlx5_query_port_ptys(priv->mdev, out, sizeof(out), MLX5_PTYS_EN, 1)) - return 1; - - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; i++) { - if (eth_proto_oper & MLX5E_PROT_MASK(i)) - return 0; - } - return 1; + return mlx5e_port_linkspeed(priv->mdev, &speed); } struct mlx5ehdr { -- cgit v1.2.3 From 926b37f76fb0a22fe93c8873c819fd167180e85c Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 23 Oct 2019 12:57:54 +0300 Subject: net/mlx5e: Initialize on stack link modes bitmap Initialize link modes bitmap on stack before using it, otherwise the outcome of ethtool set link ksettings might have unexpected values. Fixes: 4b95840a6ced ("net/mlx5e: Fix matching of speed to PRM link modes") Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c5a9c20d7f00..327c93a7bd55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1021,7 +1021,7 @@ static bool ext_link_mode_requested(const unsigned long *adver) { #define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT; - __ETHTOOL_DECLARE_LINK_MODE_MASK(modes); + __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = {0,}; bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size); return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS); -- cgit v1.2.3 From e19868efea0c103f23b4b7e986fd0a703822111f Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 27 Oct 2019 16:39:15 +0200 Subject: net/mlx4_core: Dynamically set guaranteed amount of counters per VF Prior to this patch, the amount of counters guaranteed per VF in the resource tracker was MLX4_VF_COUNTERS_PER_PORT * MLX4_MAX_PORTS. It was set regardless if the VF was single or dual port. This caused several VFs to have no guaranteed counters although the system could satisfy their request. The fix is to dynamically guarantee counters, based on each VF specification. Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker") Signed-off-by: Eran Ben Elisha Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 42 +++++++++++++--------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 4356f3a58002..1187ef1375e2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -471,12 +471,31 @@ void mlx4_init_quotas(struct mlx4_dev *dev) priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; } -static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev) +static int +mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev, + struct resource_allocator *res_alloc, + int vf) { - /* reduce the sink counter */ - return (dev->caps.max_counters - 1 - - (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS)) - / MLX4_MAX_PORTS; + struct mlx4_active_ports actv_ports; + int ports, counters_guaranteed; + + /* For master, only allocate according to the number of phys ports */ + if (vf == mlx4_master_func_num(dev)) + return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports; + + /* calculate real number of ports for the VF */ + actv_ports = mlx4_get_active_ports(dev, vf); + ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT; + + /* If we do not have enough counters for this VF, do not + * allocate any for it. '-1' to reduce the sink counter. + */ + if ((res_alloc->res_reserved + counters_guaranteed) > + (dev->caps.max_counters - 1)) + return 0; + + return counters_guaranteed; } int mlx4_init_resource_tracker(struct mlx4_dev *dev) @@ -484,7 +503,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i, j; int t; - int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev); priv->mfunc.master.res_tracker.slave_list = kcalloc(dev->num_slaves, sizeof(struct slave_list), @@ -603,16 +621,8 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) break; case RES_COUNTER: res_alloc->quota[t] = dev->caps.max_counters; - if (t == mlx4_master_func_num(dev)) - res_alloc->guaranteed[t] = - MLX4_PF_COUNTERS_PER_PORT * - MLX4_MAX_PORTS; - else if (t <= max_vfs_guarantee_counter) - res_alloc->guaranteed[t] = - MLX4_VF_COUNTERS_PER_PORT * - MLX4_MAX_PORTS; - else - res_alloc->guaranteed[t] = 0; + res_alloc->guaranteed[t] = + mlx4_calc_res_counter_guaranteed(dev, res_alloc, t); break; default: break; -- cgit v1.2.3 From e56bd641ca61beb92b135298d5046905f920b734 Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Mon, 28 Oct 2019 13:09:46 +0800 Subject: net: hisilicon: Fix ping latency when deal with high throughput This is due to error in over budget processing. When dealing with high throughput, the used buffers that exceeds the budget is not cleaned up. In addition, it takes a lot of cycles to clean up the used buffer, and then the buffer where the valid data is located can take effect. Signed-off-by: Jiangfeng Xiao Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index ad6d91219daf..4606a7e4a6d1 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -237,6 +237,7 @@ struct hip04_priv { dma_addr_t rx_phys[RX_DESC_NUM]; unsigned int rx_head; unsigned int rx_buf_size; + unsigned int rx_cnt_remaining; struct device_node *phy_node; struct phy_device *phy; @@ -575,7 +576,6 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi); struct net_device *ndev = priv->ndev; struct net_device_stats *stats = &ndev->stats; - unsigned int cnt = hip04_recv_cnt(priv); struct rx_desc *desc; struct sk_buff *skb; unsigned char *buf; @@ -588,8 +588,8 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) /* clean up tx descriptors */ tx_remaining = hip04_tx_reclaim(ndev, false); - - while (cnt && !last) { + priv->rx_cnt_remaining += hip04_recv_cnt(priv); + while (priv->rx_cnt_remaining && !last) { buf = priv->rx_buf[priv->rx_head]; skb = build_skb(buf, priv->rx_buf_size); if (unlikely(!skb)) { @@ -635,11 +635,13 @@ refill: hip04_set_recv_desc(priv, phys); priv->rx_head = RX_NEXT(priv->rx_head); - if (rx >= budget) + if (rx >= budget) { + --priv->rx_cnt_remaining; goto done; + } - if (--cnt == 0) - cnt = hip04_recv_cnt(priv); + if (--priv->rx_cnt_remaining == 0) + priv->rx_cnt_remaining += hip04_recv_cnt(priv); } if (!(priv->reg_inten & RCV_INT)) { @@ -724,6 +726,7 @@ static int hip04_mac_open(struct net_device *ndev) int i; priv->rx_head = 0; + priv->rx_cnt_remaining = 0; priv->tx_head = 0; priv->tx_tail = 0; hip04_reset_ppe(priv); -- cgit v1.2.3 From 2eb8d6d2910cfe3dc67dc056f26f3dd9c63d47cd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 28 Oct 2019 23:19:35 +0800 Subject: erspan: fix the tun_info options_len check for erspan The check for !md doens't really work for ip_tunnel_info_opts(info) which only does info + 1. Also to avoid out-of-bounds access on info, it should ensure options_len is not less than erspan_metadata in both erspan_xmit() and ip6erspan_tunnel_xmit(). Fixes: 1a66a836da ("gre: add collect_md mode to ERSPAN tunnel") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 ++-- net/ipv6/ip6_gre.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 52690bb3e40f..10636fb6093e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -509,9 +509,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) key = &tun_info->key; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) goto err_free_skb; - md = ip_tunnel_info_opts(tun_info); - if (!md) + if (tun_info->options_len < sizeof(*md)) goto err_free_skb; + md = ip_tunnel_info_opts(tun_info); /* ERSPAN has fixed 8 byte GRE header */ version = md->version; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 787d9f2a6e99..923034c52ce4 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -980,9 +980,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, dsfield = key->tos; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) goto tx_err; - md = ip_tunnel_info_opts(tun_info); - if (!md) + if (tun_info->options_len < sizeof(*md)) goto tx_err; + md = ip_tunnel_info_opts(tun_info); tun_id = tunnel_id_to_key32(key->tun_id); if (md->version == 1) { -- cgit v1.2.3 From eadf52cf1852196a1363044dcda22fa5d7f296f7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Oct 2019 01:24:32 +0800 Subject: vxlan: check tun_info options_len properly This patch is to improve the tun_info options_len by dropping the skb when TUNNEL_VXLAN_OPT is set but options_len is less than vxlan_metadata. This can void a potential out-of-bounds access on ip_tun_info. Fixes: ee122c79d422 ("vxlan: Flow based tunneling") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index fcf028220bca..ac5c597aa703 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2487,9 +2487,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; - if (info->options_len && - info->key.tun_flags & TUNNEL_VXLAN_OPT) + if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + if (info->options_len < sizeof(*md)) + goto drop; md = ip_tunnel_info_opts(info); + } ttl = info->key.ttl; tos = info->key.tos; label = info->key.label; -- cgit v1.2.3 From f9f2933842ecd11e9df4b99d96540ee128266402 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 28 Oct 2019 15:11:31 -0700 Subject: MAINTAINERS: remove Dave Watson as TLS maintainer Dave's Facebook email address is not working, and my attempts to contact him are failing. Let's remove it to trim down the list of TLS maintainers. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e51a68bf8ca8..b6b6c75f7e6f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11407,7 +11407,6 @@ F: include/trace/events/tcp.h NETWORKING [TLS] M: Boris Pismenny M: Aviad Yehezkel -M: Dave Watson M: John Fastabend M: Daniel Borkmann M: Jakub Kicinski -- cgit v1.2.3 From 3b56be218f65e26bb651095d7a5b107f67a6c5c2 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 29 Oct 2019 09:53:18 +0800 Subject: net: fec_main: Use platform_get_irq_byname_optional() to avoid error message Failed to get irq using name is NOT fatal as driver will use index to get irq instead, use platform_get_irq_byname_optional() instead of platform_get_irq_byname() to avoid below error message during probe: [ 0.819312] fec 30be0000.ethernet: IRQ int0 not found [ 0.824433] fec 30be0000.ethernet: IRQ int1 not found [ 0.829539] fec 30be0000.ethernet: IRQ int2 not found Signed-off-by: Anson Huang Acked-by: Fugang Duan Reviewed-by: Stephen Boyd Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d4d4c72adf49..22c01b224baa 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3558,7 +3558,7 @@ fec_probe(struct platform_device *pdev) for (i = 0; i < irq_cnt; i++) { snprintf(irq_name, sizeof(irq_name), "int%d", i); - irq = platform_get_irq_byname(pdev, irq_name); + irq = platform_get_irq_byname_optional(pdev, irq_name); if (irq < 0) irq = platform_get_irq(pdev, i); if (irq < 0) { -- cgit v1.2.3 From b86bcb299092c1d5cd30f6188d58f73526ec9ada Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 29 Oct 2019 09:53:19 +0800 Subject: net: fec_ptp: Use platform_get_irq_xxx_optional() to avoid error message Use platform_get_irq_byname_optional() and platform_get_irq_optional() instead of platform_get_irq_byname() and platform_get_irq() for optional IRQs to avoid below error message during probe: [ 0.795803] fec 30be0000.ethernet: IRQ pps not found [ 0.800787] fec 30be0000.ethernet: IRQ index 3 not found Signed-off-by: Anson Huang Acked-by: Fugang Duan Reviewed-by: Stephen Boyd Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_ptp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 19e2365be7d8..945643c02615 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -600,9 +600,9 @@ void fec_ptp_init(struct platform_device *pdev, int irq_idx) INIT_DELAYED_WORK(&fep->time_keep, fec_time_keep); - irq = platform_get_irq_byname(pdev, "pps"); + irq = platform_get_irq_byname_optional(pdev, "pps"); if (irq < 0) - irq = platform_get_irq(pdev, irq_idx); + irq = platform_get_irq_optional(pdev, irq_idx); /* Failure to get an irq is not fatal, * only the PTP_CLOCK_PPS clock events should stop */ -- cgit v1.2.3 From ad9bd8daf2f9938572b0604e1280fefa8f338581 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 29 Oct 2019 09:12:32 +0000 Subject: bonding: fix using uninitialized mode_lock When a bonding interface is being created, it setups its mode and options. At that moment, it uses mode_lock so mode_lock should be initialized before that moment. rtnl_newlink() rtnl_create_link() alloc_netdev_mqs() ->setup() //bond_setup() ->newlink //bond_newlink bond_changelink() register_netdevice() ->ndo_init() //bond_init() After commit 089bca2caed0 ("bonding: use dynamic lockdep key instead of subclass"), mode_lock is initialized in bond_init(). So in the bond_changelink(), un-initialized mode_lock can be used. mode_lock should be initialized in bond_setup(). This patch partially reverts commit 089bca2caed0 ("bonding: use dynamic lockdep key instead of subclass") Test command: ip link add bond0 type bond mode 802.3ad lacp_rate 0 Splat looks like: [ 60.615127] INFO: trying to register non-static key. [ 60.615900] the code is fine but needs lockdep annotation. [ 60.616697] turning off the locking correctness validator. [ 60.617490] CPU: 1 PID: 957 Comm: ip Not tainted 5.4.0-rc3+ #109 [ 60.618350] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 60.619481] Call Trace: [ 60.619918] dump_stack+0x7c/0xbb [ 60.620453] register_lock_class+0x1215/0x14d0 [ 60.621131] ? alloc_netdev_mqs+0x7b3/0xcc0 [ 60.621771] ? is_bpf_text_address+0x86/0xf0 [ 60.622416] ? is_dynamic_key+0x230/0x230 [ 60.623032] ? unwind_get_return_address+0x5f/0xa0 [ 60.623757] ? create_prof_cpu_mask+0x20/0x20 [ 60.624408] ? arch_stack_walk+0x83/0xb0 [ 60.625023] __lock_acquire+0xd8/0x3de0 [ 60.625616] ? stack_trace_save+0x82/0xb0 [ 60.626225] ? stack_trace_consume_entry+0x160/0x160 [ 60.626957] ? deactivate_slab.isra.80+0x2c5/0x800 [ 60.627668] ? register_lock_class+0x14d0/0x14d0 [ 60.628380] ? alloc_netdev_mqs+0x7b3/0xcc0 [ 60.629020] ? save_stack+0x69/0x80 [ 60.629574] ? save_stack+0x19/0x80 [ 60.630121] ? __kasan_kmalloc.constprop.4+0xa0/0xd0 [ 60.630859] ? __kmalloc_node+0x16f/0x480 [ 60.631472] ? alloc_netdev_mqs+0x7b3/0xcc0 [ 60.632121] ? rtnl_create_link+0x2ed/0xad0 [ 60.634388] ? __rtnl_newlink+0xad4/0x11b0 [ 60.635024] lock_acquire+0x164/0x3b0 [ 60.635608] ? bond_3ad_update_lacp_rate+0x91/0x200 [bonding] [ 60.636463] _raw_spin_lock_bh+0x38/0x70 [ 60.637084] ? bond_3ad_update_lacp_rate+0x91/0x200 [bonding] [ 60.637930] bond_3ad_update_lacp_rate+0x91/0x200 [bonding] [ 60.638753] ? bond_3ad_lacpdu_recv+0xb30/0xb30 [bonding] [ 60.639552] ? bond_opt_get_val+0x180/0x180 [bonding] [ 60.640307] ? ___slab_alloc+0x5aa/0x610 [ 60.640925] bond_option_lacp_rate_set+0x71/0x140 [bonding] [ 60.641751] __bond_opt_set+0x1ff/0xbb0 [bonding] [ 60.643217] ? kasan_unpoison_shadow+0x30/0x40 [ 60.643924] bond_changelink+0x9a4/0x1700 [bonding] [ 60.644653] ? memset+0x1f/0x40 [ 60.742941] ? bond_slave_changelink+0x1a0/0x1a0 [bonding] [ 60.752694] ? alloc_netdev_mqs+0x8ea/0xcc0 [ 60.753330] ? rtnl_create_link+0x2ed/0xad0 [ 60.753964] bond_newlink+0x1e/0x60 [bonding] [ 60.754612] __rtnl_newlink+0xb9f/0x11b0 [ ... ] Reported-by: syzbot+8da67f407bcba2c72e6e@syzkaller.appspotmail.com Reported-by: syzbot+0d083911ab18b710da71@syzkaller.appspotmail.com Fixes: 089bca2caed0 ("bonding: use dynamic lockdep key instead of subclass") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a48950b81434..480f9459b402 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4284,6 +4284,7 @@ void bond_setup(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + spin_lock_init(&bond->mode_lock); bond->params = bonding_defaults; /* Initialize pointers */ @@ -4756,7 +4757,6 @@ static int bond_init(struct net_device *bond_dev) if (!bond->wq) return -ENOMEM; - spin_lock_init(&bond->mode_lock); spin_lock_init(&bond->stats_lock); lockdep_register_key(&bond->stats_lock_key); lockdep_set_class(&bond->stats_lock, &bond->stats_lock_key); -- cgit v1.2.3 From 301428ea3708188dc4a243e6e6b46c03b46a0fbc Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 29 Oct 2019 12:41:26 +0100 Subject: net/smc: fix refcounting for non-blocking connect() If a nonblocking socket is immediately closed after connect(), the connect worker may not have started. This results in a refcount problem, since sock_hold() is called from the connect worker. This patch moves the sock_hold in front of the connect worker scheduling. Reported-by: syzbot+4c063e6dea39e4b79f29@syzkaller.appspotmail.com Fixes: 50717a37db03 ("net/smc: nonblocking connect rework") Reviewed-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index cea3c36ea0da..47946f489fd4 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -707,8 +707,6 @@ static int __smc_connect(struct smc_sock *smc) int smc_type; int rc = 0; - sock_hold(&smc->sk); /* sock put in passive closing */ - if (smc->use_fallback) return smc_connect_fallback(smc, smc->fallback_rsn); @@ -853,6 +851,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, rc = kernel_connect(smc->clcsock, addr, alen, flags); if (rc && rc != -EINPROGRESS) goto out; + + sock_hold(&smc->sk); /* sock put in passive closing */ if (flags & O_NONBLOCK) { if (schedule_work(&smc->connect_work)) smc->connect_nonblock = 1; -- cgit v1.2.3 From 8b73018fe44521c1cf59d7bac53624c87d3f10e2 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 29 Oct 2019 13:59:32 +0200 Subject: net: rtnetlink: fix a typo fbd -> fdb A simple typo fix in the nl error message (fbd -> fdb). CC: David Ahern Fixes: 8c6e137fbc7f ("rtnetlink: Update rtnl_fdb_dump for strict data checking") Signed-off-by: Nikolay Aleksandrov Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ba4b4048ec3e..c81cd80114d9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3917,7 +3917,7 @@ static int valid_fdb_dump_strict(const struct nlmsghdr *nlh, ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) { - NL_SET_ERR_MSG(extack, "Invalid values in header for fbd dump request"); + NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request"); return -EINVAL; } -- cgit v1.2.3 From 59cd826fb5e7889515bf5771e295e0624c348571 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 22 Oct 2019 21:56:43 +0300 Subject: drm/i915: Fix PCH reference clock for FDI on HSW/BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The change to skip the PCH reference initialization during fastboot did end up breaking FDI. To fix that let's try to do the PCH reference init whenever we're disabling a DPLL that was using said reference previously. Cc: stable@vger.kernel.org Tested-by: Andrija Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112084 Fixes: b16c7ed95caf ("drm/i915: Do not touch the PCH SSC reference if a PLL is using it") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191022185643.1483-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit dd5279c71405533d4ddbb9453effc60f0f5bf211) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 11 ++++++----- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 15 +++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index aa54bb22796d..dfff6f4357b8 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -9315,7 +9315,6 @@ static bool wrpll_uses_pch_ssc(struct drm_i915_private *dev_priv, static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv) { struct intel_encoder *encoder; - bool pch_ssc_in_use = false; bool has_fdi = false; for_each_intel_encoder(&dev_priv->drm, encoder) { @@ -9343,22 +9342,24 @@ static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv) * clock hierarchy. That would also allow us to do * clock bending finally. */ + dev_priv->pch_ssc_use = 0; + if (spll_uses_pch_ssc(dev_priv)) { DRM_DEBUG_KMS("SPLL using PCH SSC\n"); - pch_ssc_in_use = true; + dev_priv->pch_ssc_use |= BIT(DPLL_ID_SPLL); } if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL1)) { DRM_DEBUG_KMS("WRPLL1 using PCH SSC\n"); - pch_ssc_in_use = true; + dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL1); } if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL2)) { DRM_DEBUG_KMS("WRPLL2 using PCH SSC\n"); - pch_ssc_in_use = true; + dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL2); } - if (pch_ssc_in_use) + if (dev_priv->pch_ssc_use) return; if (has_fdi) { diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index b8148f838354..d5a298c3c83b 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -525,16 +525,31 @@ static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, val = I915_READ(WRPLL_CTL(id)); I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE); POSTING_READ(WRPLL_CTL(id)); + + /* + * Try to set up the PCH reference clock once all DPLLs + * that depend on it have been shut down. + */ + if (dev_priv->pch_ssc_use & BIT(id)) + intel_init_pch_refclk(dev_priv); } static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + enum intel_dpll_id id = pll->info->id; u32 val; val = I915_READ(SPLL_CTL); I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE); POSTING_READ(SPLL_CTL); + + /* + * Try to set up the PCH reference clock once all DPLLs + * that depend on it have been shut down. + */ + if (dev_priv->pch_ssc_use & BIT(id)) + intel_init_pch_refclk(dev_priv); } static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 772154e4073e..953e1d12c23c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1723,6 +1723,8 @@ struct drm_i915_private { struct work_struct idle_work; } gem; + u8 pch_ssc_use; + /* For i945gm vblank irq vs. C3 workaround */ struct { struct work_struct work; -- cgit v1.2.3 From 1d9b0b66c3ef03e42db63068e1a4e7250992e2b1 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 29 Oct 2019 21:39:16 -0700 Subject: MAINTAINERS: Change to my personal email address I'm leaving SiFive in a bit less than two weeks, which means I'll be losing my @sifive email address. I don't have my new email address yet, so I'm switching over to my personal address instead. Signed-off-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt Signed-off-by: Paul Walmsley --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c6c34d04ce95..f97f35163033 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13906,7 +13906,7 @@ F: drivers/mtd/nand/raw/r852.h RISC-V ARCHITECTURE M: Paul Walmsley -M: Palmer Dabbelt +M: Palmer Dabbelt M: Albert Ou L: linux-riscv@lists.infradead.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git @@ -14783,7 +14783,7 @@ F: drivers/media/usb/siano/ F: drivers/media/mmc/siano/ SIFIVE DRIVERS -M: Palmer Dabbelt +M: Palmer Dabbelt M: Paul Walmsley L: linux-riscv@lists.infradead.org T: git git://github.com/sifive/riscv-linux.git @@ -14793,7 +14793,7 @@ N: sifive SIFIVE FU540 SYSTEM-ON-CHIP M: Paul Walmsley -M: Palmer Dabbelt +M: Palmer Dabbelt L: linux-riscv@lists.infradead.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/pjw/sifive.git S: Supported -- cgit v1.2.3 From 1251dab9e0a2c4d0d2d48370ba5baa095a5e8774 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Oct 2019 11:23:53 +0100 Subject: USB: serial: whiteheat: fix potential slab corruption Fix a user-controlled slab buffer overflow due to a missing sanity check on the bulk-out transfer buffer used for control requests. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191029102354.2733-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/whiteheat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 79314d8c94a4..76cabcb30d21 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -559,6 +559,10 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command, command_port = port->serial->port[COMMAND_PORT]; command_info = usb_get_serial_port_data(command_port); + + if (command_port->bulk_out_size < datasize + 1) + return -EIO; + mutex_lock(&command_info->mutex); command_info->command_finished = false; -- cgit v1.2.3 From 84968291d7924261c6a0624b9a72f952398e258b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Oct 2019 11:23:54 +0100 Subject: USB: serial: whiteheat: fix line-speed endianness Add missing endianness conversion when setting the line speed so that this driver might work also on big-endian machines. Also use an unsigned format specifier in the corresponding debug message. Signed-off-by: Johan Hovold Cc: stable Link: https://lore.kernel.org/r/20191029102354.2733-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/whiteheat.c | 9 ++++++--- drivers/usb/serial/whiteheat.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 76cabcb30d21..ca3bd58f2025 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -636,6 +636,7 @@ static void firm_setup_port(struct tty_struct *tty) struct device *dev = &port->dev; struct whiteheat_port_settings port_settings; unsigned int cflag = tty->termios.c_cflag; + speed_t baud; port_settings.port = port->port_number + 1; @@ -696,11 +697,13 @@ static void firm_setup_port(struct tty_struct *tty) dev_dbg(dev, "%s - XON = %2x, XOFF = %2x\n", __func__, port_settings.xon, port_settings.xoff); /* get the baud rate wanted */ - port_settings.baud = tty_get_baud_rate(tty); - dev_dbg(dev, "%s - baud rate = %d\n", __func__, port_settings.baud); + baud = tty_get_baud_rate(tty); + port_settings.baud = cpu_to_le32(baud); + dev_dbg(dev, "%s - baud rate = %u\n", __func__, baud); /* fixme: should set validated settings */ - tty_encode_baud_rate(tty, port_settings.baud, port_settings.baud); + tty_encode_baud_rate(tty, baud, baud); + /* handle any settings that aren't specified in the tty structure */ port_settings.lloop = 0; diff --git a/drivers/usb/serial/whiteheat.h b/drivers/usb/serial/whiteheat.h index 00398149cd8d..269e727a92f9 100644 --- a/drivers/usb/serial/whiteheat.h +++ b/drivers/usb/serial/whiteheat.h @@ -87,7 +87,7 @@ struct whiteheat_simple { struct whiteheat_port_settings { __u8 port; /* port number (1 to N) */ - __u32 baud; /* any value 7 - 460800, firmware calculates + __le32 baud; /* any value 7 - 460800, firmware calculates best fit; arrives little endian */ __u8 bits; /* 5, 6, 7, or 8 */ __u8 stop; /* 1 or 2, default 1 (2 = 1.5 if bits = 5) */ -- cgit v1.2.3 From ec649fed66bb242cca145ab364485c5a126efc53 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Mon, 21 Oct 2019 16:50:45 +0900 Subject: nl80211: Disallow setting of HT for channel 14 This patch disables setting of HT20 and more for channel 14 because the channel is only for IEEE 802.11b. The patch for net/wireless/util.c was unit-tested. The patch for net/wireless/chan.c was tested with iw command. Before this patch. $ sudo iw dev set channel 14 HT20 $ After this patch. $ sudo iw dev set channel 14 HT20 kernel reports: invalid channel definition command failed: Invalid argument (-22) $ Signed-off-by: Masashi Honma Link: https://lore.kernel.org/r/20191021075045.2719-1-masashi.honma@gmail.com [clean up the code, use != instead of equivalent >] Signed-off-by: Johannes Berg --- net/wireless/chan.c | 5 +++++ net/wireless/util.c | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index e851cafd8e2f..fcac5c6366e1 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -204,6 +204,11 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) return false; } + /* channel 14 is only for IEEE 802.11b */ + if (chandef->center_freq1 == 2484 && + chandef->width != NL80211_CHAN_WIDTH_20_NOHT) + return false; + if (cfg80211_chandef_is_edmg(chandef) && !cfg80211_edmg_chandef_valid(chandef)) return false; diff --git a/net/wireless/util.c b/net/wireless/util.c index 419eb12c1e93..5b4ed5bbc542 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1559,7 +1559,8 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, } if (freq == 2484) { - if (chandef->width > NL80211_CHAN_WIDTH_40) + /* channel 14 is only for IEEE 802.11b */ + if (chandef->width != NL80211_CHAN_WIDTH_20_NOHT) return false; *op_class = 82; /* channel 14 */ -- cgit v1.2.3 From 1fab1b89e2e8f01204a9c05a39fd0b6411a48593 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Tue, 29 Oct 2019 10:30:03 +0100 Subject: nl80211: fix validation of mesh path nexthop Mesh path nexthop should be a ethernet address, but current validation checks against 4 byte integers. Cc: stable@vger.kernel.org Fixes: 2ec600d672e74 ("nl80211/cfg80211: support for mesh, sta dumping") Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20191029093003.10355-1-markus.theil@tu-ilmenau.de Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4453dd375de9..7b72286922f7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -393,7 +393,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ }, [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_MESH_ID_LEN }, - [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, + [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT, [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, -- cgit v1.2.3 From 565d454280f85c0a4f204fb160f3d90f8418c080 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 23 Oct 2019 21:59:41 +0800 Subject: iommu/ipmmu-vmsa: Remove dev_err() on platform_get_irq() failure platform_get_irq() will call dev_err() itself on failure, so there is no need for the driver to also do this. This is detected by coccinelle. Signed-off-by: YueHaibing Reviewed-by: Geert Uytterhoeven Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 237103465b82..2639fc718117 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1105,10 +1105,8 @@ static int ipmmu_probe(struct platform_device *pdev) /* Root devices have mandatory IRQs */ if (ipmmu_is_root(mmu)) { irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "no IRQ found\n"); + if (irq < 0) return irq; - } ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, dev_name(&pdev->dev), mmu); -- cgit v1.2.3 From ad3e8da2d422c63c13819a53d3c5ea9312cc0b9d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 21 Oct 2019 17:17:21 +0200 Subject: iommu/amd: Apply the same IVRS IOAPIC workaround to Acer Aspire A315-41 Acer Aspire A315-41 requires the very same workaround as the existing quirk for Dell Latitude 5495. Add the new entry for that. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1137799 Signed-off-by: Takashi Iwai Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_quirks.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/amd_iommu_quirks.c b/drivers/iommu/amd_iommu_quirks.c index c235f79b7a20..5120ce4fdce3 100644 --- a/drivers/iommu/amd_iommu_quirks.c +++ b/drivers/iommu/amd_iommu_quirks.c @@ -73,6 +73,19 @@ static const struct dmi_system_id ivrs_quirks[] __initconst = { }, .driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495], }, + { + /* + * Acer Aspire A315-41 requires the very same workaround as + * Dell Latitude 5495 + */ + .callback = ivrs_ioapic_quirk_cb, + .ident = "Acer Aspire A315-41", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A315-41"), + }, + .driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495], + }, { .callback = ivrs_ioapic_quirk_cb, .ident = "Lenovo ideapad 330S-15ARR", -- cgit v1.2.3 From 160c63f909ffbc797c0bbe23310ac1eaf2349d2f Mon Sep 17 00:00:00 2001 From: John Donnelly Date: Mon, 21 Oct 2019 21:48:10 -0500 Subject: iommu/vt-d: Fix panic after kexec -p for kdump This cures a panic on restart after a kexec operation on 5.3 and 5.4 kernels. The underlying state of the iommu registers (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED) on a restart results in a domain being marked as "DEFER_DEVICE_DOMAIN_INFO" that produces an Oops in identity_mapping(). [ 43.654737] BUG: kernel NULL pointer dereference, address: 0000000000000056 [ 43.655720] #PF: supervisor read access in kernel mode [ 43.655720] #PF: error_code(0x0000) - not-present page [ 43.655720] PGD 0 P4D 0 [ 43.655720] Oops: 0000 [#1] SMP PTI [ 43.655720] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.3.2-1940.el8uek.x86_64 #1 [ 43.655720] Hardware name: Oracle Corporation ORACLE SERVER X5-2/ASM,MOTHERBOARD,1U, BIOS 30140300 09/20/2018 [ 43.655720] RIP: 0010:iommu_need_mapping+0x29/0xd0 [ 43.655720] Code: 00 0f 1f 44 00 00 48 8b 97 70 02 00 00 48 83 fa ff 74 53 48 8d 4a ff b8 01 00 00 00 48 83 f9 fd 76 01 c3 48 8b 35 7f 58 e0 01 <48> 39 72 58 75 f2 55 48 89 e5 41 54 53 48 8b 87 28 02 00 00 4c 8b [ 43.655720] RSP: 0018:ffffc9000001b9b0 EFLAGS: 00010246 [ 43.655720] RAX: 0000000000000001 RBX: 0000000000001000 RCX: fffffffffffffffd [ 43.655720] RDX: fffffffffffffffe RSI: ffff8880719b8000 RDI: ffff8880477460b0 [ 43.655720] RBP: ffffc9000001b9e8 R08: 0000000000000000 R09: ffff888047c01700 [ 43.655720] R10: 00002194036fc692 R11: 0000000000000000 R12: 0000000000000000 [ 43.655720] R13: ffff8880477460b0 R14: 0000000000000cc0 R15: ffff888072d2b558 [ 43.655720] FS: 0000000000000000(0000) GS:ffff888071c00000(0000) knlGS:0000000000000000 [ 43.655720] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 43.655720] CR2: 0000000000000056 CR3: 000000007440a002 CR4: 00000000001606b0 [ 43.655720] Call Trace: [ 43.655720] ? intel_alloc_coherent+0x2a/0x180 [ 43.655720] ? __schedule+0x2c2/0x650 [ 43.655720] dma_alloc_attrs+0x8c/0xd0 [ 43.655720] dma_pool_alloc+0xdf/0x200 [ 43.655720] ehci_qh_alloc+0x58/0x130 [ 43.655720] ehci_setup+0x287/0x7ba [ 43.655720] ? _dev_info+0x6c/0x83 [ 43.655720] ehci_pci_setup+0x91/0x436 [ 43.655720] usb_add_hcd.cold.48+0x1d4/0x754 [ 43.655720] usb_hcd_pci_probe+0x2bc/0x3f0 [ 43.655720] ehci_pci_probe+0x39/0x40 [ 43.655720] local_pci_probe+0x47/0x80 [ 43.655720] pci_device_probe+0xff/0x1b0 [ 43.655720] really_probe+0xf5/0x3a0 [ 43.655720] driver_probe_device+0xbb/0x100 [ 43.655720] device_driver_attach+0x58/0x60 [ 43.655720] __driver_attach+0x8f/0x150 [ 43.655720] ? device_driver_attach+0x60/0x60 [ 43.655720] bus_for_each_dev+0x74/0xb0 [ 43.655720] driver_attach+0x1e/0x20 [ 43.655720] bus_add_driver+0x151/0x1f0 [ 43.655720] ? ehci_hcd_init+0xb2/0xb2 [ 43.655720] ? do_early_param+0x95/0x95 [ 43.655720] driver_register+0x70/0xc0 [ 43.655720] ? ehci_hcd_init+0xb2/0xb2 [ 43.655720] __pci_register_driver+0x57/0x60 [ 43.655720] ehci_pci_init+0x6a/0x6c [ 43.655720] do_one_initcall+0x4a/0x1fa [ 43.655720] ? do_early_param+0x95/0x95 [ 43.655720] kernel_init_freeable+0x1bd/0x262 [ 43.655720] ? rest_init+0xb0/0xb0 [ 43.655720] kernel_init+0xe/0x110 [ 43.655720] ret_from_fork+0x24/0x50 Fixes: 8af46c784ecfe ("iommu/vt-d: Implement is_attach_deferred iommu ops entry") Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: John Donnelly Reviewed-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 79e35b3180ac..6db6d969e31c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2794,7 +2794,7 @@ static int identity_mapping(struct device *dev) struct device_domain_info *info; info = dev->archdata.iommu; - if (info && info != DUMMY_DEVICE_DOMAIN_INFO) + if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO) return (info->domain == si_domain); return 0; -- cgit v1.2.3 From df4028658f9dd7f82fd190e9db5bd82c05f94625 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 28 Oct 2019 12:17:44 +0100 Subject: staging: Add VirtualBox guest shared folder (vboxsf) support VirtualBox hosts can share folders with guests, this commit adds a VFS driver implementing the Linux-guest side of this, allowing folders exported by the host to be mounted under Linux. This driver depends on the guest <-> host IPC functions exported by the vboxguest driver. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20191028111744.143863-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 6 + drivers/staging/Kconfig | 2 + drivers/staging/Makefile | 1 + drivers/staging/vboxsf/Kconfig | 10 + drivers/staging/vboxsf/Makefile | 5 + drivers/staging/vboxsf/TODO | 7 + drivers/staging/vboxsf/dir.c | 418 ++++++++++++++ drivers/staging/vboxsf/file.c | 370 +++++++++++++ drivers/staging/vboxsf/shfl_hostintf.h | 901 +++++++++++++++++++++++++++++++ drivers/staging/vboxsf/super.c | 499 +++++++++++++++++ drivers/staging/vboxsf/utils.c | 550 +++++++++++++++++++ drivers/staging/vboxsf/vboxsf_wrappers.c | 371 +++++++++++++ drivers/staging/vboxsf/vfsmod.h | 138 +++++ 13 files changed, 3278 insertions(+) create mode 100644 drivers/staging/vboxsf/Kconfig create mode 100644 drivers/staging/vboxsf/Makefile create mode 100644 drivers/staging/vboxsf/TODO create mode 100644 drivers/staging/vboxsf/dir.c create mode 100644 drivers/staging/vboxsf/file.c create mode 100644 drivers/staging/vboxsf/shfl_hostintf.h create mode 100644 drivers/staging/vboxsf/super.c create mode 100644 drivers/staging/vboxsf/utils.c create mode 100644 drivers/staging/vboxsf/vboxsf_wrappers.c create mode 100644 drivers/staging/vboxsf/vfsmod.h diff --git a/MAINTAINERS b/MAINTAINERS index c6c34d04ce95..71789e42ed17 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17335,6 +17335,12 @@ F: include/linux/vbox_utils.h F: include/uapi/linux/vbox*.h F: drivers/virt/vboxguest/ +VIRTUAL BOX SHARED FOLDER VFS DRIVER: +M: Hans de Goede +L: linux-fsdevel@vger.kernel.org +S: Maintained +F: drivers/staging/vboxsf/* + VIRTUAL SERIO DEVICE DRIVER M: Stephen Chandler Paul S: Maintained diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 6f1fa4c849a1..927d29eb92c6 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -125,4 +125,6 @@ source "drivers/staging/exfat/Kconfig" source "drivers/staging/qlge/Kconfig" +source "drivers/staging/vboxsf/Kconfig" + endif # STAGING diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index a90f9b308c8d..f01f04199073 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -53,3 +53,4 @@ obj-$(CONFIG_UWB) += uwb/ obj-$(CONFIG_USB_WUSB) += wusbcore/ obj-$(CONFIG_EXFAT_FS) += exfat/ obj-$(CONFIG_QLGE) += qlge/ +obj-$(CONFIG_VBOXSF_FS) += vboxsf/ diff --git a/drivers/staging/vboxsf/Kconfig b/drivers/staging/vboxsf/Kconfig new file mode 100644 index 000000000000..b84586ae08b3 --- /dev/null +++ b/drivers/staging/vboxsf/Kconfig @@ -0,0 +1,10 @@ +config VBOXSF_FS + tristate "VirtualBox guest shared folder (vboxsf) support" + depends on X86 && VBOXGUEST + select NLS + help + VirtualBox hosts can share folders with guests, this driver + implements the Linux-guest side of this allowing folders exported + by the host to be mounted under Linux. + + If you want to use shared folders in VirtualBox guests, answer Y or M. diff --git a/drivers/staging/vboxsf/Makefile b/drivers/staging/vboxsf/Makefile new file mode 100644 index 000000000000..9e4328e79623 --- /dev/null +++ b/drivers/staging/vboxsf/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: MIT + +obj-$(CONFIG_VBOXSF_FS) += vboxsf.o + +vboxsf-y := dir.o file.o utils.o vboxsf_wrappers.o super.o diff --git a/drivers/staging/vboxsf/TODO b/drivers/staging/vboxsf/TODO new file mode 100644 index 000000000000..8b9193d0d4f0 --- /dev/null +++ b/drivers/staging/vboxsf/TODO @@ -0,0 +1,7 @@ +TODO: +- Find a file-system developer to review this and give their Reviewed-By +- Address any items coming up during review +- Move to fs/vboxfs + +Please send any patches to Greg Kroah-Hartman +and Hans de Goede diff --git a/drivers/staging/vboxsf/dir.c b/drivers/staging/vboxsf/dir.c new file mode 100644 index 000000000000..f260b5cc1646 --- /dev/null +++ b/drivers/staging/vboxsf/dir.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Directory inode and file operations + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include +#include +#include "vfsmod.h" + +static int vboxsf_dir_open(struct inode *inode, struct file *file) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + struct shfl_createparms params = {}; + struct vboxsf_dir_info *sf_d; + int err; + + sf_d = vboxsf_dir_info_alloc(); + if (!sf_d) + return -ENOMEM; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_DIRECTORY | SHFL_CF_ACT_OPEN_IF_EXISTS | + SHFL_CF_ACT_FAIL_IF_NEW | SHFL_CF_ACCESS_READ; + + err = vboxsf_create_at_dentry(file_dentry(file), ¶ms); + if (err) + goto err_free_dir_info; + + if (params.result != SHFL_FILE_EXISTS) { + err = -ENOENT; + goto err_close; + } + + err = vboxsf_dir_read_all(sbi, sf_d, params.handle); + if (err) + goto err_close; + + vboxsf_close(sbi->root, params.handle); + file->private_data = sf_d; + return 0; + +err_close: + vboxsf_close(sbi->root, params.handle); +err_free_dir_info: + vboxsf_dir_info_free(sf_d); + return err; +} + +static int vboxsf_dir_release(struct inode *inode, struct file *file) +{ + if (file->private_data) + vboxsf_dir_info_free(file->private_data); + + return 0; +} + +static unsigned int vboxsf_get_d_type(u32 mode) +{ + unsigned int d_type; + + switch (mode & SHFL_TYPE_MASK) { + case SHFL_TYPE_FIFO: + d_type = DT_FIFO; + break; + case SHFL_TYPE_DEV_CHAR: + d_type = DT_CHR; + break; + case SHFL_TYPE_DIRECTORY: + d_type = DT_DIR; + break; + case SHFL_TYPE_DEV_BLOCK: + d_type = DT_BLK; + break; + case SHFL_TYPE_FILE: + d_type = DT_REG; + break; + case SHFL_TYPE_SYMLINK: + d_type = DT_LNK; + break; + case SHFL_TYPE_SOCKET: + d_type = DT_SOCK; + break; + case SHFL_TYPE_WHITEOUT: + d_type = DT_WHT; + break; + default: + d_type = DT_UNKNOWN; + break; + } + return d_type; +} + +static bool vboxsf_dir_emit(struct file *dir, struct dir_context *ctx) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(file_inode(dir)->i_sb); + struct vboxsf_dir_info *sf_d = dir->private_data; + struct shfl_dirinfo *info; + struct vboxsf_dir_buf *b; + unsigned int d_type; + loff_t i, cur = 0; + ino_t fake_ino; + size_t size; + int err; + + list_for_each_entry(b, &sf_d->info_list, head) { +try_next_entry: + if (ctx->pos >= cur + b->entries) { + cur += b->entries; + continue; + } + + /* + * Note the vboxsf_dir_info objects we are iterating over here + * are variable sized, so the info pointer may end up being + * unaligned. This is how we get the data from the host. + * Since vboxsf is only supported on x86 machines this is not + * a problem. + */ + for (i = 0, info = b->buf; i < ctx->pos - cur; i++) { + size = offsetof(struct shfl_dirinfo, name.string) + + info->name.size; + info = (struct shfl_dirinfo *)((uintptr_t)info + size); + } + + /* Info now points to the right entry, emit it. */ + d_type = vboxsf_get_d_type(info->info.attr.mode); + + /* + * On 32 bit systems pos is 64 signed, while ino is 32 bit + * unsigned so fake_ino may overflow, check for this. + */ + if ((ino_t)(ctx->pos + 1) != (u64)(ctx->pos + 1)) { + vbg_err("vboxsf: fake ino overflow, truncating dir\n"); + return false; + } + fake_ino = ctx->pos + 1; + + if (sbi->nls) { + char d_name[NAME_MAX]; + + err = vboxsf_nlscpy(sbi, d_name, NAME_MAX, + info->name.string.utf8, + info->name.length); + if (err) { + /* skip erroneous entry and proceed */ + ctx->pos += 1; + goto try_next_entry; + } + + return dir_emit(ctx, d_name, strlen(d_name), + fake_ino, d_type); + } + + return dir_emit(ctx, info->name.string.utf8, info->name.length, + fake_ino, d_type); + } + + return false; +} + +static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx) +{ + bool keep_iterating; + + for (keep_iterating = true; keep_iterating; ctx->pos += 1) + keep_iterating = vboxsf_dir_emit(dir, ctx); + + return 0; +} + +const struct file_operations vboxsf_dir_fops = { + .open = vboxsf_dir_open, + .iterate = vboxsf_dir_iterate, + .release = vboxsf_dir_release, + .read = generic_read_dir, + .llseek = generic_file_llseek, +}; + +/* + * This is called during name resolution/lookup to check if the @dentry in + * the cache is still valid. the job is handled by vboxsf_inode_revalidate. + */ +static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + if (flags & LOOKUP_RCU) + return -ECHILD; + + if (d_really_is_positive(dentry)) + return vboxsf_inode_revalidate(dentry) == 0; + else + return vboxsf_stat_dentry(dentry, NULL) == -ENOENT; +} + +const struct dentry_operations vboxsf_dentry_ops = { + .d_revalidate = vboxsf_dentry_revalidate +}; + +/* iops */ + +static struct dentry *vboxsf_dir_lookup(struct inode *parent, + struct dentry *dentry, + unsigned int flags) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct shfl_fsobjinfo fsinfo; + struct inode *inode; + int err; + + dentry->d_time = jiffies; + + err = vboxsf_stat_dentry(dentry, &fsinfo); + if (err) { + inode = (err == -ENOENT) ? NULL : ERR_PTR(err); + } else { + inode = vboxsf_new_inode(parent->i_sb); + if (!IS_ERR(inode)) + vboxsf_init_inode(sbi, inode, &fsinfo); + } + + return d_splice_alias(inode, dentry); +} + +static int vboxsf_dir_instantiate(struct inode *parent, struct dentry *dentry, + struct shfl_fsobjinfo *info) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct vboxsf_inode *sf_i; + struct inode *inode; + + inode = vboxsf_new_inode(parent->i_sb); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + sf_i = VBOXSF_I(inode); + /* The host may have given us different attr then requested */ + sf_i->force_restat = 1; + vboxsf_init_inode(sbi, inode, info); + + d_instantiate(dentry, inode); + + return 0; +} + +static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry, + umode_t mode, int is_dir) +{ + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct shfl_createparms params = {}; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_ACT_CREATE_IF_NEW | + SHFL_CF_ACT_FAIL_IF_EXISTS | + SHFL_CF_ACCESS_READWRITE | + (is_dir ? SHFL_CF_DIRECTORY : 0); + params.info.attr.mode = (mode & 0777) | + (is_dir ? SHFL_TYPE_DIRECTORY : SHFL_TYPE_FILE); + params.info.attr.additional = SHFLFSOBJATTRADD_NOTHING; + + err = vboxsf_create_at_dentry(dentry, ¶ms); + if (err) + return err; + + if (params.result != SHFL_FILE_CREATED) + return -EPERM; + + vboxsf_close(sbi->root, params.handle); + + err = vboxsf_dir_instantiate(parent, dentry, ¶ms.info); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + + return 0; +} + +static int vboxsf_dir_mkfile(struct inode *parent, struct dentry *dentry, + umode_t mode, bool excl) +{ + return vboxsf_dir_create(parent, dentry, mode, 0); +} + +static int vboxsf_dir_mkdir(struct inode *parent, struct dentry *dentry, + umode_t mode) +{ + return vboxsf_dir_create(parent, dentry, mode, 1); +} + +static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct inode *inode = d_inode(dentry); + struct shfl_string *path; + u32 flags; + int err; + + if (S_ISDIR(inode->i_mode)) + flags = SHFL_REMOVE_DIR; + else + flags = SHFL_REMOVE_FILE; + + if (S_ISLNK(inode->i_mode)) + flags |= SHFL_REMOVE_SYMLINK; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_remove(sbi->root, path, flags); + __putname(path); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + + return 0; +} + +static int vboxsf_dir_rename(struct inode *old_parent, + struct dentry *old_dentry, + struct inode *new_parent, + struct dentry *new_dentry, + unsigned int flags) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(old_parent->i_sb); + struct vboxsf_inode *sf_old_parent_i = VBOXSF_I(old_parent); + struct vboxsf_inode *sf_new_parent_i = VBOXSF_I(new_parent); + u32 shfl_flags = SHFL_RENAME_FILE | SHFL_RENAME_REPLACE_IF_EXISTS; + struct shfl_string *old_path, *new_path; + int err; + + if (flags) + return -EINVAL; + + old_path = vboxsf_path_from_dentry(sbi, old_dentry); + if (IS_ERR(old_path)) + return PTR_ERR(old_path); + + new_path = vboxsf_path_from_dentry(sbi, new_dentry); + if (IS_ERR(new_path)) { + err = PTR_ERR(new_path); + goto err_put_old_path; + } + + if (d_inode(old_dentry)->i_mode & S_IFDIR) + shfl_flags = 0; + + err = vboxsf_rename(sbi->root, old_path, new_path, shfl_flags); + if (err == 0) { + /* parent directories access/change time changed */ + sf_new_parent_i->force_restat = 1; + sf_old_parent_i->force_restat = 1; + } + + __putname(new_path); +err_put_old_path: + __putname(old_path); + return err; +} + +static int vboxsf_dir_symlink(struct inode *parent, struct dentry *dentry, + const char *symname) +{ + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + int symname_size = strlen(symname) + 1; + struct shfl_string *path, *ssymname; + struct shfl_fsobjinfo info; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + ssymname = kmalloc(SHFLSTRING_HEADER_SIZE + symname_size, GFP_KERNEL); + if (!ssymname) { + __putname(path); + return -ENOMEM; + } + ssymname->length = symname_size - 1; + ssymname->size = symname_size; + memcpy(ssymname->string.utf8, symname, symname_size); + + err = vboxsf_symlink(sbi->root, path, ssymname, &info); + kfree(ssymname); + __putname(path); + if (err) { + /* -EROFS means symlinks are note support -> -EPERM */ + return (err == -EROFS) ? -EPERM : err; + } + + err = vboxsf_dir_instantiate(parent, dentry, &info); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + return 0; +} + +const struct inode_operations vboxsf_dir_iops = { + .lookup = vboxsf_dir_lookup, + .create = vboxsf_dir_mkfile, + .mkdir = vboxsf_dir_mkdir, + .rmdir = vboxsf_dir_unlink, + .unlink = vboxsf_dir_unlink, + .rename = vboxsf_dir_rename, + .symlink = vboxsf_dir_symlink, + .getattr = vboxsf_getattr, + .setattr = vboxsf_setattr, +}; diff --git a/drivers/staging/vboxsf/file.c b/drivers/staging/vboxsf/file.c new file mode 100644 index 000000000000..4b61ccf83fca --- /dev/null +++ b/drivers/staging/vboxsf/file.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Regular file inode and file ops. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include +#include +#include +#include +#include +#include "vfsmod.h" + +struct vboxsf_handle { + u64 handle; + u32 root; + u32 access_flags; + struct kref refcount; + struct list_head head; +}; + +static int vboxsf_file_open(struct inode *inode, struct file *file) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct shfl_createparms params = {}; + struct vboxsf_handle *sf_handle; + u32 access_flags = 0; + int err; + + sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL); + if (!sf_handle) + return -ENOMEM; + + /* + * We check the value of params.handle afterwards to find out if + * the call succeeded or failed, as the API does not seem to cleanly + * distinguish error and informational messages. + * + * Furthermore, we must set params.handle to SHFL_HANDLE_NIL to + * make the shared folders host service use our mode parameter. + */ + params.handle = SHFL_HANDLE_NIL; + if (file->f_flags & O_CREAT) { + params.create_flags |= SHFL_CF_ACT_CREATE_IF_NEW; + /* + * We ignore O_EXCL, as the Linux kernel seems to call create + * beforehand itself, so O_EXCL should always fail. + */ + if (file->f_flags & O_TRUNC) + params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS; + else + params.create_flags |= SHFL_CF_ACT_OPEN_IF_EXISTS; + } else { + params.create_flags |= SHFL_CF_ACT_FAIL_IF_NEW; + if (file->f_flags & O_TRUNC) + params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS; + } + + switch (file->f_flags & O_ACCMODE) { + case O_RDONLY: + access_flags |= SHFL_CF_ACCESS_READ; + break; + + case O_WRONLY: + access_flags |= SHFL_CF_ACCESS_WRITE; + break; + + case O_RDWR: + access_flags |= SHFL_CF_ACCESS_READWRITE; + break; + + default: + WARN_ON(1); + } + + if (file->f_flags & O_APPEND) + access_flags |= SHFL_CF_ACCESS_APPEND; + + params.create_flags |= access_flags; + params.info.attr.mode = inode->i_mode; + + err = vboxsf_create_at_dentry(file_dentry(file), ¶ms); + if (err == 0 && params.handle == SHFL_HANDLE_NIL) + err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT; + if (err) { + kfree(sf_handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + + /* init our handle struct and add it to the inode's handles list */ + sf_handle->handle = params.handle; + sf_handle->root = VBOXSF_SBI(inode->i_sb)->root; + sf_handle->access_flags = access_flags; + kref_init(&sf_handle->refcount); + + mutex_lock(&sf_i->handle_list_mutex); + list_add(&sf_handle->head, &sf_i->handle_list); + mutex_unlock(&sf_i->handle_list_mutex); + + file->private_data = sf_handle; + return 0; +} + +static void vboxsf_handle_release(struct kref *refcount) +{ + struct vboxsf_handle *sf_handle = + container_of(refcount, struct vboxsf_handle, refcount); + + vboxsf_close(sf_handle->root, sf_handle->handle); + kfree(sf_handle); +} + +static int vboxsf_file_release(struct inode *inode, struct file *file) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct vboxsf_handle *sf_handle = file->private_data; + + /* + * When a file is closed on our (the guest) side, we want any subsequent + * accesses done on the host side to see all changes done from our side. + */ + filemap_write_and_wait(inode->i_mapping); + + mutex_lock(&sf_i->handle_list_mutex); + list_del(&sf_handle->head); + mutex_unlock(&sf_i->handle_list_mutex); + + kref_put(&sf_handle->refcount, vboxsf_handle_release); + return 0; +} + +/* + * Write back dirty pages now, because there may not be any suitable + * open files later + */ +static void vboxsf_vma_close(struct vm_area_struct *vma) +{ + filemap_write_and_wait(vma->vm_file->f_mapping); +} + +static const struct vm_operations_struct vboxsf_file_vm_ops = { + .close = vboxsf_vma_close, + .fault = filemap_fault, + .map_pages = filemap_map_pages, +}; + +static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err; + + err = generic_file_mmap(file, vma); + if (!err) + vma->vm_ops = &vboxsf_file_vm_ops; + + return err; +} + +/* + * Note that since we are accessing files on the host's filesystem, files + * may always be changed underneath us by the host! + * + * The vboxsf API between the guest and the host does not offer any functions + * to deal with this. There is no inode-generation to check for changes, no + * events / callback on changes and no way to lock files. + * + * To avoid returning stale data when a file gets *opened* on our (the guest) + * side, we do a "stat" on the host side, then compare the mtime with the + * last known mtime and invalidate the page-cache if they differ. + * This is done from vboxsf_inode_revalidate(). + * + * When reads are done through the read_iter fop, it is possible to do + * further cache revalidation then, there are 3 options to deal with this: + * + * 1) Rely solely on the revalidation done at open time + * 2) Do another "stat" and compare mtime again. Unfortunately the vboxsf + * host API does not allow stat on handles, so we would need to use + * file->f_path.dentry and the stat will then fail if the file was unlinked + * or renamed (and there is no thing like NFS' silly-rename). So we get: + * 2a) "stat" and compare mtime, on stat failure invalidate the cache + * 2b) "stat" and compare mtime, on stat failure do nothing + * 3) Simply always call invalidate_inode_pages2_range on the range of the read + * + * Currently we are keeping things KISS and using option 1. this allows + * directly using generic_file_read_iter without wrapping it. + * + * This means that only data written on the host side before open() on + * the guest side is guaranteed to be seen by the guest. If necessary + * we may provide other read-cache strategies in the future and make this + * configurable through a mount option. + */ +const struct file_operations vboxsf_reg_fops = { + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .mmap = vboxsf_file_mmap, + .open = vboxsf_file_open, + .release = vboxsf_file_release, + .fsync = noop_fsync, + .splice_read = generic_file_splice_read, +}; + +const struct inode_operations vboxsf_reg_iops = { + .getattr = vboxsf_getattr, + .setattr = vboxsf_setattr +}; + +static int vboxsf_readpage(struct file *file, struct page *page) +{ + struct vboxsf_handle *sf_handle = file->private_data; + loff_t off = page_offset(page); + u32 nread = PAGE_SIZE; + u8 *buf; + int err; + + buf = kmap(page); + + err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf); + if (err == 0) { + memset(&buf[nread], 0, PAGE_SIZE - nread); + flush_dcache_page(page); + SetPageUptodate(page); + } else { + SetPageError(page); + } + + kunmap(page); + unlock_page(page); + return err; +} + +static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i) +{ + struct vboxsf_handle *h, *sf_handle = NULL; + + mutex_lock(&sf_i->handle_list_mutex); + list_for_each_entry(h, &sf_i->handle_list, head) { + if (h->access_flags == SHFL_CF_ACCESS_WRITE || + h->access_flags == SHFL_CF_ACCESS_READWRITE) { + kref_get(&h->refcount); + sf_handle = h; + break; + } + } + mutex_unlock(&sf_i->handle_list_mutex); + + return sf_handle; +} + +static int vboxsf_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct vboxsf_handle *sf_handle; + loff_t off = page_offset(page); + loff_t size = i_size_read(inode); + u32 nwrite = PAGE_SIZE; + u8 *buf; + int err; + + if (off + PAGE_SIZE > size) + nwrite = size & ~PAGE_MASK; + + sf_handle = vboxsf_get_write_handle(sf_i); + if (!sf_handle) + return -EBADF; + + buf = kmap(page); + err = vboxsf_write(sf_handle->root, sf_handle->handle, + off, &nwrite, buf); + kunmap(page); + + kref_put(&sf_handle->refcount, vboxsf_handle_release); + + if (err == 0) { + ClearPageError(page); + /* mtime changed */ + sf_i->force_restat = 1; + } else { + ClearPageUptodate(page); + } + + unlock_page(page); + return err; +} + +static int vboxsf_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + struct vboxsf_handle *sf_handle = file->private_data; + unsigned int from = pos & ~PAGE_MASK; + u32 nwritten = len; + u8 *buf; + int err; + + buf = kmap(page); + err = vboxsf_write(sf_handle->root, sf_handle->handle, + pos, &nwritten, buf + from); + kunmap(page); + + if (err) { + nwritten = 0; + goto out; + } + + /* mtime changed */ + VBOXSF_I(inode)->force_restat = 1; + + if (!PageUptodate(page) && nwritten == PAGE_SIZE) + SetPageUptodate(page); + + pos += nwritten; + if (pos > inode->i_size) + i_size_write(inode, pos); + +out: + unlock_page(page); + put_page(page); + + return nwritten; +} + +const struct address_space_operations vboxsf_reg_aops = { + .readpage = vboxsf_readpage, + .writepage = vboxsf_writepage, + .set_page_dirty = __set_page_dirty_nobuffers, + .write_begin = simple_write_begin, + .write_end = vboxsf_write_end, +}; + +static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + struct shfl_string *path; + char *link; + int err; + + if (!dentry) + return ERR_PTR(-ECHILD); + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return (char *)path; + + link = kzalloc(PATH_MAX, GFP_KERNEL); + if (!link) { + __putname(path); + return ERR_PTR(-ENOMEM); + } + + err = vboxsf_readlink(sbi->root, path, PATH_MAX, link); + __putname(path); + if (err) { + kfree(link); + return ERR_PTR(err); + } + + set_delayed_call(done, kfree_link, link); + return link; +} + +const struct inode_operations vboxsf_lnk_iops = { + .get_link = vboxsf_get_link +}; diff --git a/drivers/staging/vboxsf/shfl_hostintf.h b/drivers/staging/vboxsf/shfl_hostintf.h new file mode 100644 index 000000000000..aca829062c12 --- /dev/null +++ b/drivers/staging/vboxsf/shfl_hostintf.h @@ -0,0 +1,901 @@ +/* SPDX-License-Identifier: MIT */ +/* + * VirtualBox Shared Folders: host interface definition. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#ifndef SHFL_HOSTINTF_H +#define SHFL_HOSTINTF_H + +#include + +/* The max in/out buffer size for a FN_READ or FN_WRITE call */ +#define SHFL_MAX_RW_COUNT (16 * SZ_1M) + +/* + * Structures shared between guest and the service + * can be relocated and use offsets to point to variable + * length parts. + * + * Shared folders protocol works with handles. + * Before doing any action on a file system object, + * one have to obtain the object handle via a SHFL_FN_CREATE + * request. A handle must be closed with SHFL_FN_CLOSE. + */ + +enum { + SHFL_FN_QUERY_MAPPINGS = 1, /* Query mappings changes. */ + SHFL_FN_QUERY_MAP_NAME = 2, /* Query map name. */ + SHFL_FN_CREATE = 3, /* Open/create object. */ + SHFL_FN_CLOSE = 4, /* Close object handle. */ + SHFL_FN_READ = 5, /* Read object content. */ + SHFL_FN_WRITE = 6, /* Write new object content. */ + SHFL_FN_LOCK = 7, /* Lock/unlock a range in the object. */ + SHFL_FN_LIST = 8, /* List object content. */ + SHFL_FN_INFORMATION = 9, /* Query/set object information. */ + /* Note function number 10 is not used! */ + SHFL_FN_REMOVE = 11, /* Remove object */ + SHFL_FN_MAP_FOLDER_OLD = 12, /* Map folder (legacy) */ + SHFL_FN_UNMAP_FOLDER = 13, /* Unmap folder */ + SHFL_FN_RENAME = 14, /* Rename object */ + SHFL_FN_FLUSH = 15, /* Flush file */ + SHFL_FN_SET_UTF8 = 16, /* Select UTF8 filename encoding */ + SHFL_FN_MAP_FOLDER = 17, /* Map folder */ + SHFL_FN_READLINK = 18, /* Read symlink dest (as of VBox 4.0) */ + SHFL_FN_SYMLINK = 19, /* Create symlink (as of VBox 4.0) */ + SHFL_FN_SET_SYMLINKS = 20, /* Ask host to show symlinks (4.0+) */ +}; + +/* Root handles for a mapping are of type u32, Root handles are unique. */ +#define SHFL_ROOT_NIL UINT_MAX + +/* Shared folders handle for an opened object are of type u64. */ +#define SHFL_HANDLE_NIL ULLONG_MAX + +/* Hardcoded maximum length (in chars) of a shared folder name. */ +#define SHFL_MAX_LEN (256) +/* Hardcoded maximum number of shared folder mapping available to the guest. */ +#define SHFL_MAX_MAPPINGS (64) + +/** Shared folder string buffer structure. */ +struct shfl_string { + /** Allocated size of the string member in bytes. */ + u16 size; + + /** Length of string without trailing nul in bytes. */ + u16 length; + + /** UTF-8 or UTF-16 string. Nul terminated. */ + union { + u8 utf8[2]; + u16 utf16[1]; + u16 ucs2[1]; /* misnomer, use utf16. */ + } string; +}; +VMMDEV_ASSERT_SIZE(shfl_string, 6); + +/* The size of shfl_string w/o the string part. */ +#define SHFLSTRING_HEADER_SIZE 4 + +/* Calculate size of the string. */ +static inline u32 shfl_string_buf_size(const struct shfl_string *string) +{ + return string ? SHFLSTRING_HEADER_SIZE + string->size : 0; +} + +/* Set user id on execution (S_ISUID). */ +#define SHFL_UNIX_ISUID 0004000U +/* Set group id on execution (S_ISGID). */ +#define SHFL_UNIX_ISGID 0002000U +/* Sticky bit (S_ISVTX / S_ISTXT). */ +#define SHFL_UNIX_ISTXT 0001000U + +/* Owner readable (S_IRUSR). */ +#define SHFL_UNIX_IRUSR 0000400U +/* Owner writable (S_IWUSR). */ +#define SHFL_UNIX_IWUSR 0000200U +/* Owner executable (S_IXUSR). */ +#define SHFL_UNIX_IXUSR 0000100U + +/* Group readable (S_IRGRP). */ +#define SHFL_UNIX_IRGRP 0000040U +/* Group writable (S_IWGRP). */ +#define SHFL_UNIX_IWGRP 0000020U +/* Group executable (S_IXGRP). */ +#define SHFL_UNIX_IXGRP 0000010U + +/* Other readable (S_IROTH). */ +#define SHFL_UNIX_IROTH 0000004U +/* Other writable (S_IWOTH). */ +#define SHFL_UNIX_IWOTH 0000002U +/* Other executable (S_IXOTH). */ +#define SHFL_UNIX_IXOTH 0000001U + +/* Named pipe (fifo) (S_IFIFO). */ +#define SHFL_TYPE_FIFO 0010000U +/* Character device (S_IFCHR). */ +#define SHFL_TYPE_DEV_CHAR 0020000U +/* Directory (S_IFDIR). */ +#define SHFL_TYPE_DIRECTORY 0040000U +/* Block device (S_IFBLK). */ +#define SHFL_TYPE_DEV_BLOCK 0060000U +/* Regular file (S_IFREG). */ +#define SHFL_TYPE_FILE 0100000U +/* Symbolic link (S_IFLNK). */ +#define SHFL_TYPE_SYMLINK 0120000U +/* Socket (S_IFSOCK). */ +#define SHFL_TYPE_SOCKET 0140000U +/* Whiteout (S_IFWHT). */ +#define SHFL_TYPE_WHITEOUT 0160000U +/* Type mask (S_IFMT). */ +#define SHFL_TYPE_MASK 0170000U + +/* Checks the mode flags indicate a directory (S_ISDIR). */ +#define SHFL_IS_DIRECTORY(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_DIRECTORY) +/* Checks the mode flags indicate a symbolic link (S_ISLNK). */ +#define SHFL_IS_SYMLINK(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_SYMLINK) + +/** The available additional information in a shfl_fsobjattr object. */ +enum shfl_fsobjattr_add { + /** No additional information is available / requested. */ + SHFLFSOBJATTRADD_NOTHING = 1, + /** + * The additional unix attributes (shfl_fsobjattr::u::unix_attr) are + * available / requested. + */ + SHFLFSOBJATTRADD_UNIX, + /** + * The additional extended attribute size (shfl_fsobjattr::u::size) is + * available / requested. + */ + SHFLFSOBJATTRADD_EASIZE, + /** + * The last valid item (inclusive). + * The valid range is SHFLFSOBJATTRADD_NOTHING thru + * SHFLFSOBJATTRADD_LAST. + */ + SHFLFSOBJATTRADD_LAST = SHFLFSOBJATTRADD_EASIZE, + + /** The usual 32-bit hack. */ + SHFLFSOBJATTRADD_32BIT_SIZE_HACK = 0x7fffffff +}; + +/** + * Additional unix Attributes, these are available when + * shfl_fsobjattr.additional == SHFLFSOBJATTRADD_UNIX. + */ +struct shfl_fsobjattr_unix { + /** + * The user owning the filesystem object (st_uid). + * This field is ~0U if not supported. + */ + u32 uid; + + /** + * The group the filesystem object is assigned (st_gid). + * This field is ~0U if not supported. + */ + u32 gid; + + /** + * Number of hard links to this filesystem object (st_nlink). + * This field is 1 if the filesystem doesn't support hardlinking or + * the information isn't available. + */ + u32 hardlinks; + + /** + * The device number of the device which this filesystem object resides + * on (st_dev). This field is 0 if this information is not available. + */ + u32 inode_id_device; + + /** + * The unique identifier (within the filesystem) of this filesystem + * object (st_ino). Together with inode_id_device, this field can be + * used as a OS wide unique id, when both their values are not 0. + * This field is 0 if the information is not available. + */ + u64 inode_id; + + /** + * User flags (st_flags). + * This field is 0 if this information is not available. + */ + u32 flags; + + /** + * The current generation number (st_gen). + * This field is 0 if this information is not available. + */ + u32 generation_id; + + /** + * The device number of a char. or block device type object (st_rdev). + * This field is 0 if the file isn't a char. or block device or when + * the OS doesn't use the major+minor device idenfication scheme. + */ + u32 device; +} __packed; + +/** Extended attribute size. */ +struct shfl_fsobjattr_easize { + /** Size of EAs. */ + s64 cb; +} __packed; + +/** Shared folder filesystem object attributes. */ +struct shfl_fsobjattr { + /** Mode flags (st_mode). SHFL_UNIX_*, SHFL_TYPE_*, and SHFL_DOS_*. */ + u32 mode; + + /** The additional attributes available. */ + enum shfl_fsobjattr_add additional; + + /** + * Additional attributes. + * + * Unless explicitly specified to an API, the API can provide additional + * data as it is provided by the underlying OS. + */ + union { + struct shfl_fsobjattr_unix unix_attr; + struct shfl_fsobjattr_easize size; + } __packed u; +} __packed; +VMMDEV_ASSERT_SIZE(shfl_fsobjattr, 44); + +struct shfl_timespec { + s64 ns_relative_to_unix_epoch; +}; + +/** Filesystem object information structure. */ +struct shfl_fsobjinfo { + /** + * Logical size (st_size). + * For normal files this is the size of the file. + * For symbolic links, this is the length of the path name contained + * in the symbolic link. + * For other objects this fields needs to be specified. + */ + s64 size; + + /** Disk allocation size (st_blocks * DEV_BSIZE). */ + s64 allocated; + + /** Time of last access (st_atime). */ + struct shfl_timespec access_time; + + /** Time of last data modification (st_mtime). */ + struct shfl_timespec modification_time; + + /** + * Time of last status change (st_ctime). + * If not available this is set to modification_time. + */ + struct shfl_timespec change_time; + + /** + * Time of file birth (st_birthtime). + * If not available this is set to change_time. + */ + struct shfl_timespec birth_time; + + /** Attributes. */ + struct shfl_fsobjattr attr; + +} __packed; +VMMDEV_ASSERT_SIZE(shfl_fsobjinfo, 92); + +/** + * result of an open/create request. + * Along with handle value the result code + * identifies what has happened while + * trying to open the object. + */ +enum shfl_create_result { + SHFL_NO_RESULT, + /** Specified path does not exist. */ + SHFL_PATH_NOT_FOUND, + /** Path to file exists, but the last component does not. */ + SHFL_FILE_NOT_FOUND, + /** File already exists and either has been opened or not. */ + SHFL_FILE_EXISTS, + /** New file was created. */ + SHFL_FILE_CREATED, + /** Existing file was replaced or overwritten. */ + SHFL_FILE_REPLACED +}; + +/* No flags. Initialization value. */ +#define SHFL_CF_NONE (0x00000000) + +/* + * Only lookup the object, do not return a handle. When this is set all other + * flags are ignored. + */ +#define SHFL_CF_LOOKUP (0x00000001) + +/* + * Open parent directory of specified object. + * Useful for the corresponding Windows FSD flag + * and for opening paths like \\dir\\*.* to search the 'dir'. + */ +#define SHFL_CF_OPEN_TARGET_DIRECTORY (0x00000002) + +/* Create/open a directory. */ +#define SHFL_CF_DIRECTORY (0x00000004) + +/* + * Open/create action to do if object exists + * and if the object does not exists. + * REPLACE file means atomically DELETE and CREATE. + * OVERWRITE file means truncating the file to 0 and + * setting new size. + * When opening an existing directory REPLACE and OVERWRITE + * actions are considered invalid, and cause returning + * FILE_EXISTS with NIL handle. + */ +#define SHFL_CF_ACT_MASK_IF_EXISTS (0x000000f0) +#define SHFL_CF_ACT_MASK_IF_NEW (0x00000f00) + +/* What to do if object exists. */ +#define SHFL_CF_ACT_OPEN_IF_EXISTS (0x00000000) +#define SHFL_CF_ACT_FAIL_IF_EXISTS (0x00000010) +#define SHFL_CF_ACT_REPLACE_IF_EXISTS (0x00000020) +#define SHFL_CF_ACT_OVERWRITE_IF_EXISTS (0x00000030) + +/* What to do if object does not exist. */ +#define SHFL_CF_ACT_CREATE_IF_NEW (0x00000000) +#define SHFL_CF_ACT_FAIL_IF_NEW (0x00000100) + +/* Read/write requested access for the object. */ +#define SHFL_CF_ACCESS_MASK_RW (0x00003000) + +/* No access requested. */ +#define SHFL_CF_ACCESS_NONE (0x00000000) +/* Read access requested. */ +#define SHFL_CF_ACCESS_READ (0x00001000) +/* Write access requested. */ +#define SHFL_CF_ACCESS_WRITE (0x00002000) +/* Read/Write access requested. */ +#define SHFL_CF_ACCESS_READWRITE (0x00003000) + +/* Requested share access for the object. */ +#define SHFL_CF_ACCESS_MASK_DENY (0x0000c000) + +/* Allow any access. */ +#define SHFL_CF_ACCESS_DENYNONE (0x00000000) +/* Do not allow read. */ +#define SHFL_CF_ACCESS_DENYREAD (0x00004000) +/* Do not allow write. */ +#define SHFL_CF_ACCESS_DENYWRITE (0x00008000) +/* Do not allow access. */ +#define SHFL_CF_ACCESS_DENYALL (0x0000c000) + +/* Requested access to attributes of the object. */ +#define SHFL_CF_ACCESS_MASK_ATTR (0x00030000) + +/* No access requested. */ +#define SHFL_CF_ACCESS_ATTR_NONE (0x00000000) +/* Read access requested. */ +#define SHFL_CF_ACCESS_ATTR_READ (0x00010000) +/* Write access requested. */ +#define SHFL_CF_ACCESS_ATTR_WRITE (0x00020000) +/* Read/Write access requested. */ +#define SHFL_CF_ACCESS_ATTR_READWRITE (0x00030000) + +/* + * The file is opened in append mode. + * Ignored if SHFL_CF_ACCESS_WRITE is not set. + */ +#define SHFL_CF_ACCESS_APPEND (0x00040000) + +/** Create parameters buffer struct for SHFL_FN_CREATE call */ +struct shfl_createparms { + /** Returned handle of opened object. */ + u64 handle; + + /** Returned result of the operation */ + enum shfl_create_result result; + + /** SHFL_CF_* */ + u32 create_flags; + + /** + * Attributes of object to create and + * returned actual attributes of opened/created object. + */ + struct shfl_fsobjinfo info; +} __packed; + +/** Shared Folder directory information */ +struct shfl_dirinfo { + /** Full information about the object. */ + struct shfl_fsobjinfo info; + /** + * The length of the short field (number of UTF16 chars). + * It is 16-bit for reasons of alignment. + */ + u16 short_name_len; + /** + * The short name for 8.3 compatibility. + * Empty string if not available. + */ + u16 short_name[14]; + struct shfl_string name; +}; + +/** Shared folder filesystem properties. */ +struct shfl_fsproperties { + /** + * The maximum size of a filesystem object name. + * This does not include the '\\0'. + */ + u32 max_component_len; + + /** + * True if the filesystem is remote. + * False if the filesystem is local. + */ + bool remote; + + /** + * True if the filesystem is case sensitive. + * False if the filesystem is case insensitive. + */ + bool case_sensitive; + + /** + * True if the filesystem is mounted read only. + * False if the filesystem is mounted read write. + */ + bool read_only; + + /** + * True if the filesystem can encode unicode object names. + * False if it can't. + */ + bool supports_unicode; + + /** + * True if the filesystem is compresses. + * False if it isn't or we don't know. + */ + bool compressed; + + /** + * True if the filesystem compresses of individual files. + * False if it doesn't or we don't know. + */ + bool file_compression; +}; +VMMDEV_ASSERT_SIZE(shfl_fsproperties, 12); + +struct shfl_volinfo { + s64 total_allocation_bytes; + s64 available_allocation_bytes; + u32 bytes_per_allocation_unit; + u32 bytes_per_sector; + u32 serial; + struct shfl_fsproperties properties; +}; + + +/** SHFL_FN_MAP_FOLDER Parameters structure. */ +struct shfl_map_folder { + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: UTF16 + * Path delimiter + */ + struct vmmdev_hgcm_function_parameter delimiter; + + /** + * pointer, in: SHFLROOT (u32) + * Case senstive flag + */ + struct vmmdev_hgcm_function_parameter case_sensitive; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_MAP_FOLDER (4) + + +/** SHFL_FN_UNMAP_FOLDER Parameters structure. */ +struct shfl_unmap_folder { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_UNMAP_FOLDER (1) + + +/** SHFL_FN_CREATE Parameters structure. */ +struct shfl_create { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, in/out: + * Points to struct shfl_createparms buffer. + */ + struct vmmdev_hgcm_function_parameter parms; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_CREATE (3) + + +/** SHFL_FN_CLOSE Parameters structure. */ +struct shfl_close { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to close. + */ + struct vmmdev_hgcm_function_parameter handle; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_CLOSE (2) + + +/** SHFL_FN_READ Parameters structure. */ +struct shfl_read { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to read from. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value64, in: + * Offset to read from. + */ + struct vmmdev_hgcm_function_parameter offset; + + /** + * value64, in/out: + * Bytes to read/How many were read. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, out: + * Buffer to place data to. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_READ (5) + + +/** SHFL_FN_WRITE Parameters structure. */ +struct shfl_write { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to write to. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value64, in: + * Offset to write to. + */ + struct vmmdev_hgcm_function_parameter offset; + + /** + * value64, in/out: + * Bytes to write/How many were written. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in: + * Data to write. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_WRITE (5) + + +/* + * SHFL_FN_LIST + * Listing information includes variable length RTDIRENTRY[EX] structures. + */ + +#define SHFL_LIST_NONE 0 +#define SHFL_LIST_RETURN_ONE 1 + +/** SHFL_FN_LIST Parameters structure. */ +struct shfl_list { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to be listed. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value32, in: + * List flags SHFL_LIST_*. + */ + struct vmmdev_hgcm_function_parameter flags; + + /** + * value32, in/out: + * Bytes to be used for listing information/How many bytes were used. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in/optional + * Points to struct shfl_string buffer that specifies a search path. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: + * Buffer to place listing information to. (struct shfl_dirinfo) + */ + struct vmmdev_hgcm_function_parameter buffer; + + /** + * value32, in/out: + * Indicates a key where the listing must be resumed. + * in: 0 means start from begin of object. + * out: 0 means listing completed. + */ + struct vmmdev_hgcm_function_parameter resume_point; + + /** + * pointer, out: + * Number of files returned + */ + struct vmmdev_hgcm_function_parameter file_count; +}; + +/* Number of parameters */ +#define SHFL_CPARMS_LIST (8) + + +/** SHFL_FN_READLINK Parameters structure. */ +struct shfl_readLink { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: + * Buffer to place data to. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_READLINK (3) + + +/* SHFL_FN_INFORMATION */ + +/* Mask of Set/Get bit. */ +#define SHFL_INFO_MODE_MASK (0x1) +/* Get information */ +#define SHFL_INFO_GET (0x0) +/* Set information */ +#define SHFL_INFO_SET (0x1) + +/* Get name of the object. */ +#define SHFL_INFO_NAME (0x2) +/* Set size of object (extend/trucate); only applies to file objects */ +#define SHFL_INFO_SIZE (0x4) +/* Get/Set file object info. */ +#define SHFL_INFO_FILE (0x8) +/* Get volume information. */ +#define SHFL_INFO_VOLUME (0x10) + +/** SHFL_FN_INFORMATION Parameters structure. */ +struct shfl_information { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to be listed. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value32, in: + * SHFL_INFO_* + */ + struct vmmdev_hgcm_function_parameter flags; + + /** + * value32, in/out: + * Bytes to be used for information/How many bytes were used. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in/out: + * Information to be set/get (shfl_fsobjinfo or shfl_string). Do not + * forget to set the shfl_fsobjinfo::attr::additional for a get + * operation as well. + */ + struct vmmdev_hgcm_function_parameter info; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_INFORMATION (5) + + +/* SHFL_FN_REMOVE */ + +#define SHFL_REMOVE_FILE (0x1) +#define SHFL_REMOVE_DIR (0x2) +#define SHFL_REMOVE_SYMLINK (0x4) + +/** SHFL_FN_REMOVE Parameters structure. */ +struct shfl_remove { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * value32, in: + * remove flags (file/directory) + */ + struct vmmdev_hgcm_function_parameter flags; + +}; + +#define SHFL_CPARMS_REMOVE (3) + + +/* SHFL_FN_RENAME */ + +#define SHFL_RENAME_FILE (0x1) +#define SHFL_RENAME_DIR (0x2) +#define SHFL_RENAME_REPLACE_IF_EXISTS (0x4) + +/** SHFL_FN_RENAME Parameters structure. */ +struct shfl_rename { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string src. + */ + struct vmmdev_hgcm_function_parameter src; + + /** + * pointer, in: + * Points to struct shfl_string dest. + */ + struct vmmdev_hgcm_function_parameter dest; + + /** + * value32, in: + * rename flags (file/directory) + */ + struct vmmdev_hgcm_function_parameter flags; + +}; + +#define SHFL_CPARMS_RENAME (4) + + +/** SHFL_FN_SYMLINK Parameters structure. */ +struct shfl_symlink { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string of path for the new symlink. + */ + struct vmmdev_hgcm_function_parameter new_path; + + /** + * pointer, in: + * Points to struct shfl_string of destination for symlink. + */ + struct vmmdev_hgcm_function_parameter old_path; + + /** + * pointer, out: + * Information about created symlink. + */ + struct vmmdev_hgcm_function_parameter info; + +}; + +#define SHFL_CPARMS_SYMLINK (4) + +#endif diff --git a/drivers/staging/vboxsf/super.c b/drivers/staging/vboxsf/super.c new file mode 100644 index 000000000000..3913ffafa83b --- /dev/null +++ b/drivers/staging/vboxsf/super.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Virtual File System. + * + * Module initialization/finalization + * File system registration/deregistration + * Superblock reading + * Few utility functions + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include "vfsmod.h" + +#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ + +#define VBSF_MOUNT_SIGNATURE_BYTE_0 ('\000') +#define VBSF_MOUNT_SIGNATURE_BYTE_1 ('\377') +#define VBSF_MOUNT_SIGNATURE_BYTE_2 ('\376') +#define VBSF_MOUNT_SIGNATURE_BYTE_3 ('\375') + +static int follow_symlinks; +module_param(follow_symlinks, int, 0444); +MODULE_PARM_DESC(follow_symlinks, + "Let host resolve symlinks rather than showing them"); + +static DEFINE_IDA(vboxsf_bdi_ida); +static DEFINE_MUTEX(vboxsf_setup_mutex); +static bool vboxsf_setup_done; +static struct super_operations vboxsf_super_ops; /* forward declaration */ +static struct kmem_cache *vboxsf_inode_cachep; + +static char * const vboxsf_default_nls = CONFIG_NLS_DEFAULT; + +enum { opt_nls, opt_uid, opt_gid, opt_ttl, opt_dmode, opt_fmode, + opt_dmask, opt_fmask }; + +static const struct fs_parameter_spec vboxsf_param_specs[] = { + fsparam_string ("nls", opt_nls), + fsparam_u32 ("uid", opt_uid), + fsparam_u32 ("gid", opt_gid), + fsparam_u32 ("ttl", opt_ttl), + fsparam_u32oct ("dmode", opt_dmode), + fsparam_u32oct ("fmode", opt_fmode), + fsparam_u32oct ("dmask", opt_dmask), + fsparam_u32oct ("fmask", opt_fmask), + {} +}; + +static const struct fs_parameter_description vboxsf_fs_parameters = { + .name = "vboxsf", + .specs = vboxsf_param_specs, +}; + +static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + kuid_t uid; + kgid_t gid; + int opt; + + opt = fs_parse(fc, &vboxsf_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case opt_nls: + if (fc->purpose != FS_CONTEXT_FOR_MOUNT) { + vbg_err("vboxsf: Cannot reconfigure nls option\n"); + return -EINVAL; + } + ctx->nls_name = param->string; + param->string = NULL; + break; + case opt_uid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + return -EINVAL; + ctx->o.uid = uid; + break; + case opt_gid: + gid = make_kgid(current_user_ns(), result.uint_32); + if (!gid_valid(gid)) + return -EINVAL; + ctx->o.gid = gid; + break; + case opt_ttl: + ctx->o.ttl = msecs_to_jiffies(result.uint_32); + break; + case opt_dmode: + if (result.uint_32 & ~0777) + return -EINVAL; + ctx->o.dmode = result.uint_32; + ctx->o.dmode_set = true; + break; + case opt_fmode: + if (result.uint_32 & ~0777) + return -EINVAL; + ctx->o.fmode = result.uint_32; + ctx->o.fmode_set = true; + break; + case opt_dmask: + if (result.uint_32 & ~07777) + return -EINVAL; + ctx->o.dmask = result.uint_32; + break; + case opt_fmask: + if (result.uint_32 & ~07777) + return -EINVAL; + ctx->o.fmask = result.uint_32; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + struct shfl_string *folder_name, root_path; + struct vboxsf_sbi *sbi; + struct dentry *droot; + struct inode *iroot; + char *nls_name; + size_t size; + int err; + + if (!fc->source) + return -EINVAL; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + sbi->o = ctx->o; + idr_init(&sbi->ino_idr); + spin_lock_init(&sbi->ino_idr_lock); + sbi->next_generation = 1; + sbi->bdi_id = -1; + + /* Load nls if not utf8 */ + nls_name = ctx->nls_name ? ctx->nls_name : vboxsf_default_nls; + if (strcmp(nls_name, "utf8") != 0) { + if (nls_name == vboxsf_default_nls) + sbi->nls = load_nls_default(); + else + sbi->nls = load_nls(nls_name); + + if (!sbi->nls) { + vbg_err("vboxsf: Count not load '%s' nls\n", nls_name); + err = -EINVAL; + goto fail_free; + } + } + + sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL); + if (sbi->bdi_id < 0) { + err = sbi->bdi_id; + goto fail_free; + } + + err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); + if (err) + goto fail_free; + + /* Turn source into a shfl_string and map the folder */ + size = strlen(fc->source) + 1; + folder_name = kmalloc(SHFLSTRING_HEADER_SIZE + size, GFP_KERNEL); + if (!folder_name) + goto fail_free; + folder_name->size = size; + folder_name->length = size - 1; + strlcpy(folder_name->string.utf8, fc->source, size); + err = vboxsf_map_folder(folder_name, &sbi->root); + kfree(folder_name); + if (err) { + vbg_err("vboxsf: Host rejected mount of '%s' with error %d\n", + fc->source, err); + goto fail_free; + } + + root_path.length = 1; + root_path.size = 2; + root_path.string.utf8[0] = '/'; + root_path.string.utf8[1] = 0; + err = vboxsf_stat(sbi, &root_path, &sbi->root_info); + if (err) + goto fail_unmap; + + sb->s_magic = VBOXSF_SUPER_MAGIC; + sb->s_blocksize = 1024; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_op = &vboxsf_super_ops; + sb->s_d_op = &vboxsf_dentry_ops; + + iroot = iget_locked(sb, 0); + if (!iroot) { + err = -ENOMEM; + goto fail_unmap; + } + vboxsf_init_inode(sbi, iroot, &sbi->root_info); + unlock_new_inode(iroot); + + droot = d_make_root(iroot); + if (!droot) { + err = -ENOMEM; + goto fail_unmap; + } + + sb->s_root = droot; + sb->s_fs_info = sbi; + return 0; + +fail_unmap: + vboxsf_unmap_folder(sbi->root); +fail_free: + if (sbi->bdi_id >= 0) + ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + if (sbi->nls) + unload_nls(sbi->nls); + idr_destroy(&sbi->ino_idr); + kfree(sbi); + return err; +} + +static void vboxsf_inode_init_once(void *data) +{ + struct vboxsf_inode *sf_i = data; + + mutex_init(&sf_i->handle_list_mutex); + inode_init_once(&sf_i->vfs_inode); +} + +static struct inode *vboxsf_alloc_inode(struct super_block *sb) +{ + struct vboxsf_inode *sf_i; + + sf_i = kmem_cache_alloc(vboxsf_inode_cachep, GFP_NOFS); + if (!sf_i) + return NULL; + + sf_i->force_restat = 0; + INIT_LIST_HEAD(&sf_i->handle_list); + + return &sf_i->vfs_inode; +} + +static void vboxsf_free_inode(struct inode *inode) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + unsigned long flags; + + spin_lock_irqsave(&sbi->ino_idr_lock, flags); + idr_remove(&sbi->ino_idr, inode->i_ino); + spin_unlock_irqrestore(&sbi->ino_idr_lock, flags); + kmem_cache_free(vboxsf_inode_cachep, VBOXSF_I(inode)); +} + +static void vboxsf_put_super(struct super_block *sb) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(sb); + + vboxsf_unmap_folder(sbi->root); + if (sbi->bdi_id >= 0) + ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + if (sbi->nls) + unload_nls(sbi->nls); + + /* + * vboxsf_free_inode uses the idr, make sure all delayed rcu free + * inodes are flushed. + */ + rcu_barrier(); + idr_destroy(&sbi->ino_idr); + kfree(sbi); +} + +static int vboxsf_statfs(struct dentry *dentry, struct kstatfs *stat) +{ + struct super_block *sb = dentry->d_sb; + struct shfl_volinfo shfl_volinfo; + struct vboxsf_sbi *sbi; + u32 buf_len; + int err; + + sbi = VBOXSF_SBI(sb); + buf_len = sizeof(shfl_volinfo); + err = vboxsf_fsinfo(sbi->root, 0, SHFL_INFO_GET | SHFL_INFO_VOLUME, + &buf_len, &shfl_volinfo); + if (err) + return err; + + stat->f_type = VBOXSF_SUPER_MAGIC; + stat->f_bsize = shfl_volinfo.bytes_per_allocation_unit; + + do_div(shfl_volinfo.total_allocation_bytes, + shfl_volinfo.bytes_per_allocation_unit); + stat->f_blocks = shfl_volinfo.total_allocation_bytes; + + do_div(shfl_volinfo.available_allocation_bytes, + shfl_volinfo.bytes_per_allocation_unit); + stat->f_bfree = shfl_volinfo.available_allocation_bytes; + stat->f_bavail = shfl_volinfo.available_allocation_bytes; + + stat->f_files = 1000; + /* + * Don't return 0 here since the guest may then think that it is not + * possible to create any more files. + */ + stat->f_ffree = 1000000; + stat->f_fsid.val[0] = 0; + stat->f_fsid.val[1] = 0; + stat->f_namelen = 255; + return 0; +} + +static struct super_operations vboxsf_super_ops = { + .alloc_inode = vboxsf_alloc_inode, + .free_inode = vboxsf_free_inode, + .put_super = vboxsf_put_super, + .statfs = vboxsf_statfs, +}; + +static int vboxsf_setup(void) +{ + int err; + + mutex_lock(&vboxsf_setup_mutex); + + if (vboxsf_setup_done) + goto success; + + vboxsf_inode_cachep = + kmem_cache_create("vboxsf_inode_cache", + sizeof(struct vboxsf_inode), 0, + (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | + SLAB_ACCOUNT), + vboxsf_inode_init_once); + if (!vboxsf_inode_cachep) { + err = -ENOMEM; + goto fail_nomem; + } + + err = vboxsf_connect(); + if (err) { + vbg_err("vboxsf: err %d connecting to guest PCI-device\n", err); + vbg_err("vboxsf: make sure you are inside a VirtualBox VM\n"); + vbg_err("vboxsf: and check dmesg for vboxguest errors\n"); + goto fail_free_cache; + } + + err = vboxsf_set_utf8(); + if (err) { + vbg_err("vboxsf_setutf8 error %d\n", err); + goto fail_disconnect; + } + + if (!follow_symlinks) { + err = vboxsf_set_symlinks(); + if (err) + vbg_warn("vboxsf: Unable to show symlinks: %d\n", err); + } + + vboxsf_setup_done = true; +success: + mutex_unlock(&vboxsf_setup_mutex); + return 0; + +fail_disconnect: + vboxsf_disconnect(); +fail_free_cache: + kmem_cache_destroy(vboxsf_inode_cachep); +fail_nomem: + mutex_unlock(&vboxsf_setup_mutex); + return err; +} + +static int vboxsf_parse_monolithic(struct fs_context *fc, void *data) +{ + char *options = data; + + if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 && + options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 && + options[2] == VBSF_MOUNT_SIGNATURE_BYTE_2 && + options[3] == VBSF_MOUNT_SIGNATURE_BYTE_3) { + vbg_err("vboxsf: Old binary mount data not supported, remove obsolete mount.vboxsf and/or update your VBoxService.\n"); + return -EINVAL; + } + + return generic_parse_monolithic(fc, data); +} + +static int vboxsf_get_tree(struct fs_context *fc) +{ + int err; + + err = vboxsf_setup(); + if (err) + return err; + + return vfs_get_super(fc, vfs_get_independent_super, vboxsf_fill_super); +} + +static int vboxsf_reconfigure(struct fs_context *fc) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(fc->root->d_sb); + struct vboxsf_fs_context *ctx = fc->fs_private; + struct inode *iroot; + + iroot = ilookup(fc->root->d_sb, 0); + if (!iroot) + return -ENOENT; + + /* Apply changed options to the root inode */ + sbi->o = ctx->o; + vboxsf_init_inode(sbi, iroot, &sbi->root_info); + + return 0; +} + +static void vboxsf_free_fc(struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + + kfree(ctx->nls_name); + kfree(ctx); +} + +static const struct fs_context_operations vboxsf_context_ops = { + .free = vboxsf_free_fc, + .parse_param = vboxsf_parse_param, + .parse_monolithic = vboxsf_parse_monolithic, + .get_tree = vboxsf_get_tree, + .reconfigure = vboxsf_reconfigure, +}; + +static int vboxsf_init_fs_context(struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + current_uid_gid(&ctx->o.uid, &ctx->o.gid); + + fc->fs_private = ctx; + fc->ops = &vboxsf_context_ops; + return 0; +} + +static struct file_system_type vboxsf_fs_type = { + .owner = THIS_MODULE, + .name = "vboxsf", + .init_fs_context = vboxsf_init_fs_context, + .parameters = &vboxsf_fs_parameters, + .kill_sb = kill_anon_super +}; + +/* Module initialization/finalization handlers */ +static int __init vboxsf_init(void) +{ + return register_filesystem(&vboxsf_fs_type); +} + +static void __exit vboxsf_fini(void) +{ + unregister_filesystem(&vboxsf_fs_type); + + mutex_lock(&vboxsf_setup_mutex); + if (vboxsf_setup_done) { + vboxsf_disconnect(); + /* + * Make sure all delayed rcu free inodes are flushed + * before we destroy the cache. + */ + rcu_barrier(); + kmem_cache_destroy(vboxsf_inode_cachep); + } + mutex_unlock(&vboxsf_setup_mutex); +} + +module_init(vboxsf_init); +module_exit(vboxsf_fini); + +MODULE_DESCRIPTION("Oracle VM VirtualBox Module for Host File System Access"); +MODULE_AUTHOR("Oracle Corporation"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_FS("vboxsf"); diff --git a/drivers/staging/vboxsf/utils.c b/drivers/staging/vboxsf/utils.c new file mode 100644 index 000000000000..1870b69c824e --- /dev/null +++ b/drivers/staging/vboxsf/utils.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Utility functions. + * Mainly conversion from/to VirtualBox/Linux data structures. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include +#include +#include +#include +#include "vfsmod.h" + +struct inode *vboxsf_new_inode(struct super_block *sb) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(sb); + struct inode *inode; + unsigned long flags; + int cursor, ret; + u32 gen; + + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&sbi->ino_idr_lock, flags); + cursor = idr_get_cursor(&sbi->ino_idr); + ret = idr_alloc_cyclic(&sbi->ino_idr, inode, 1, 0, GFP_ATOMIC); + if (ret >= 0 && ret < cursor) + sbi->next_generation++; + gen = sbi->next_generation; + spin_unlock_irqrestore(&sbi->ino_idr_lock, flags); + idr_preload_end(); + + if (ret < 0) { + iput(inode); + return ERR_PTR(ret); + } + + inode->i_ino = ret; + inode->i_generation = gen; + return inode; +} + +/* set [inode] attributes based on [info], uid/gid based on [sbi] */ +void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode, + const struct shfl_fsobjinfo *info) +{ + const struct shfl_fsobjattr *attr; + s64 allocated; + int mode; + + attr = &info->attr; + +#define mode_set(r) ((attr->mode & (SHFL_UNIX_##r)) ? (S_##r) : 0) + + mode = mode_set(IRUSR); + mode |= mode_set(IWUSR); + mode |= mode_set(IXUSR); + + mode |= mode_set(IRGRP); + mode |= mode_set(IWGRP); + mode |= mode_set(IXGRP); + + mode |= mode_set(IROTH); + mode |= mode_set(IWOTH); + mode |= mode_set(IXOTH); + +#undef mode_set + + /* We use the host-side values for these */ + inode->i_flags |= S_NOATIME | S_NOCMTIME; + inode->i_mapping->a_ops = &vboxsf_reg_aops; + + if (SHFL_IS_DIRECTORY(attr->mode)) { + inode->i_mode = sbi->o.dmode_set ? sbi->o.dmode : mode; + inode->i_mode &= ~sbi->o.dmask; + inode->i_mode |= S_IFDIR; + inode->i_op = &vboxsf_dir_iops; + inode->i_fop = &vboxsf_dir_fops; + /* + * XXX: this probably should be set to the number of entries + * in the directory plus two (. ..) + */ + set_nlink(inode, 1); + } else if (SHFL_IS_SYMLINK(attr->mode)) { + inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode; + inode->i_mode &= ~sbi->o.fmask; + inode->i_mode |= S_IFLNK; + inode->i_op = &vboxsf_lnk_iops; + set_nlink(inode, 1); + } else { + inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode; + inode->i_mode &= ~sbi->o.fmask; + inode->i_mode |= S_IFREG; + inode->i_op = &vboxsf_reg_iops; + inode->i_fop = &vboxsf_reg_fops; + set_nlink(inode, 1); + } + + inode->i_uid = sbi->o.uid; + inode->i_gid = sbi->o.gid; + + inode->i_size = info->size; + inode->i_blkbits = 12; + /* i_blocks always in units of 512 bytes! */ + allocated = info->allocated + 511; + do_div(allocated, 512); + inode->i_blocks = allocated; + + inode->i_atime = ns_to_timespec64( + info->access_time.ns_relative_to_unix_epoch); + inode->i_ctime = ns_to_timespec64( + info->change_time.ns_relative_to_unix_epoch); + inode->i_mtime = ns_to_timespec64( + info->modification_time.ns_relative_to_unix_epoch); +} + +int vboxsf_create_at_dentry(struct dentry *dentry, + struct shfl_createparms *params) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_string *path; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_create(sbi->root, path, params); + __putname(path); + + return err; +} + +int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path, + struct shfl_fsobjinfo *info) +{ + struct shfl_createparms params = {}; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW; + + err = vboxsf_create(sbi->root, path, ¶ms); + if (err) + return err; + + if (params.result != SHFL_FILE_EXISTS) + return -ENOENT; + + if (info) + *info = params.info; + + return 0; +} + +int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_string *path; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_stat(sbi, path, info); + __putname(path); + return err; +} + +int vboxsf_inode_revalidate(struct dentry *dentry) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct vboxsf_inode *sf_i; + struct shfl_fsobjinfo info; + struct timespec64 prev_mtime; + struct inode *inode; + int err; + + if (!dentry || !d_really_is_positive(dentry)) + return -EINVAL; + + inode = d_inode(dentry); + prev_mtime = inode->i_mtime; + sf_i = VBOXSF_I(inode); + if (!sf_i->force_restat) { + if (time_before(jiffies, dentry->d_time + sbi->o.ttl)) + return 0; + } + + err = vboxsf_stat_dentry(dentry, &info); + if (err) + return err; + + dentry->d_time = jiffies; + sf_i->force_restat = 0; + vboxsf_init_inode(sbi, inode, &info); + + /* + * If the file was changed on the host side we need to invalidate the + * page-cache for it. Note this also gets triggered by our own writes, + * this is unavoidable. + */ + if (timespec64_compare(&inode->i_mtime, &prev_mtime) > 0) + invalidate_inode_pages2(inode->i_mapping); + + return 0; +} + +int vboxsf_getattr(const struct path *path, struct kstat *kstat, + u32 request_mask, unsigned int flags) +{ + int err; + struct dentry *dentry = path->dentry; + struct inode *inode = d_inode(dentry); + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + + switch (flags & AT_STATX_SYNC_TYPE) { + case AT_STATX_DONT_SYNC: + err = 0; + break; + case AT_STATX_FORCE_SYNC: + sf_i->force_restat = 1; + /* fall-through */ + default: + err = vboxsf_inode_revalidate(dentry); + } + if (err) + return err; + + generic_fillattr(d_inode(dentry), kstat); + return 0; +} + +int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry)); + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_createparms params = {}; + struct shfl_fsobjinfo info = {}; + u32 buf_len; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_ACT_OPEN_IF_EXISTS | + SHFL_CF_ACT_FAIL_IF_NEW | + SHFL_CF_ACCESS_ATTR_WRITE; + + /* this is at least required for Posix hosts */ + if (iattr->ia_valid & ATTR_SIZE) + params.create_flags |= SHFL_CF_ACCESS_WRITE; + + err = vboxsf_create_at_dentry(dentry, ¶ms); + if (err || params.result != SHFL_FILE_EXISTS) + return err ? err : -ENOENT; + +#define mode_set(r) ((iattr->ia_mode & (S_##r)) ? SHFL_UNIX_##r : 0) + + /* + * Setting the file size and setting the other attributes has to + * be handled separately. + */ + if (iattr->ia_valid & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME)) { + if (iattr->ia_valid & ATTR_MODE) { + info.attr.mode = mode_set(IRUSR); + info.attr.mode |= mode_set(IWUSR); + info.attr.mode |= mode_set(IXUSR); + info.attr.mode |= mode_set(IRGRP); + info.attr.mode |= mode_set(IWGRP); + info.attr.mode |= mode_set(IXGRP); + info.attr.mode |= mode_set(IROTH); + info.attr.mode |= mode_set(IWOTH); + info.attr.mode |= mode_set(IXOTH); + + if (iattr->ia_mode & S_IFDIR) + info.attr.mode |= SHFL_TYPE_DIRECTORY; + else + info.attr.mode |= SHFL_TYPE_FILE; + } + + if (iattr->ia_valid & ATTR_ATIME) + info.access_time.ns_relative_to_unix_epoch = + timespec64_to_ns(&iattr->ia_atime); + + if (iattr->ia_valid & ATTR_MTIME) + info.modification_time.ns_relative_to_unix_epoch = + timespec64_to_ns(&iattr->ia_mtime); + + /* + * Ignore ctime (inode change time) as it can't be set + * from userland anyway. + */ + + buf_len = sizeof(info); + err = vboxsf_fsinfo(sbi->root, params.handle, + SHFL_INFO_SET | SHFL_INFO_FILE, &buf_len, + &info); + if (err) { + vboxsf_close(sbi->root, params.handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + } + +#undef mode_set + + if (iattr->ia_valid & ATTR_SIZE) { + memset(&info, 0, sizeof(info)); + info.size = iattr->ia_size; + buf_len = sizeof(info); + err = vboxsf_fsinfo(sbi->root, params.handle, + SHFL_INFO_SET | SHFL_INFO_SIZE, &buf_len, + &info); + if (err) { + vboxsf_close(sbi->root, params.handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + } + + vboxsf_close(sbi->root, params.handle); + + /* Update the inode with what the host has actually given us. */ + if (sf_i->force_restat) + vboxsf_inode_revalidate(dentry); + + return 0; +} + +/* + * [dentry] contains string encoded in coding system that corresponds + * to [sbi]->nls, we must convert it to UTF8 here. + * Returns a shfl_string allocated through __getname (must be freed using + * __putname), or an ERR_PTR on error. + */ +struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi, + struct dentry *dentry) +{ + struct shfl_string *shfl_path; + int path_len, out_len, nb; + char *buf, *path; + wchar_t uni; + u8 *out; + + buf = __getname(); + if (!buf) + return ERR_PTR(-ENOMEM); + + path = dentry_path_raw(dentry, buf, PATH_MAX); + if (IS_ERR(path)) { + __putname(buf); + return (struct shfl_string *)path; + } + path_len = strlen(path); + + if (sbi->nls) { + shfl_path = __getname(); + if (!shfl_path) { + __putname(buf); + return ERR_PTR(-ENOMEM); + } + + out = shfl_path->string.utf8; + out_len = PATH_MAX - SHFLSTRING_HEADER_SIZE - 1; + + while (path_len) { + nb = sbi->nls->char2uni(path, path_len, &uni); + if (nb < 0) { + __putname(shfl_path); + __putname(buf); + return ERR_PTR(-EINVAL); + } + path += nb; + path_len -= nb; + + nb = utf32_to_utf8(uni, out, out_len); + if (nb < 0) { + __putname(shfl_path); + __putname(buf); + return ERR_PTR(-ENAMETOOLONG); + } + out += nb; + out_len -= nb; + } + *out = 0; + shfl_path->length = out - shfl_path->string.utf8; + shfl_path->size = shfl_path->length + 1; + __putname(buf); + } else { + if ((SHFLSTRING_HEADER_SIZE + path_len + 1) > PATH_MAX) { + __putname(buf); + return ERR_PTR(-ENAMETOOLONG); + } + /* + * dentry_path stores the name at the end of buf, but the + * shfl_string string we return must be properly aligned. + */ + shfl_path = (struct shfl_string *)buf; + memmove(shfl_path->string.utf8, path, path_len); + shfl_path->string.utf8[path_len] = 0; + shfl_path->length = path_len; + shfl_path->size = path_len + 1; + } + + return shfl_path; +} + +int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, + const unsigned char *utf8_name, size_t utf8_len) +{ + const char *in; + char *out; + size_t out_len; + size_t out_bound_len; + size_t in_bound_len; + + in = utf8_name; + in_bound_len = utf8_len; + + out = name; + out_len = 0; + /* Reserve space for terminating 0 */ + out_bound_len = name_bound_len - 1; + + while (in_bound_len) { + int nb; + unicode_t uni; + + nb = utf8_to_utf32(in, in_bound_len, &uni); + if (nb < 0) + return -EINVAL; + + in += nb; + in_bound_len -= nb; + + nb = sbi->nls->uni2char(uni, out, out_bound_len); + if (nb < 0) + return nb; + + out += nb; + out_bound_len -= nb; + out_len += nb; + } + + *out = 0; + + return 0; +} + +static struct vboxsf_dir_buf *vboxsf_dir_buf_alloc(struct list_head *list) +{ + struct vboxsf_dir_buf *b; + + b = kmalloc(sizeof(*b), GFP_KERNEL); + if (!b) + return NULL; + + b->buf = kmalloc(DIR_BUFFER_SIZE, GFP_KERNEL); + if (!b->buf) { + kfree(b); + return NULL; + } + + b->entries = 0; + b->used = 0; + b->free = DIR_BUFFER_SIZE; + list_add(&b->head, list); + + return b; +} + +static void vboxsf_dir_buf_free(struct vboxsf_dir_buf *b) +{ + list_del(&b->head); + kfree(b->buf); + kfree(b); +} + +struct vboxsf_dir_info *vboxsf_dir_info_alloc(void) +{ + struct vboxsf_dir_info *p; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return NULL; + + INIT_LIST_HEAD(&p->info_list); + return p; +} + +void vboxsf_dir_info_free(struct vboxsf_dir_info *p) +{ + struct list_head *list, *pos, *tmp; + + list = &p->info_list; + list_for_each_safe(pos, tmp, list) { + struct vboxsf_dir_buf *b; + + b = list_entry(pos, struct vboxsf_dir_buf, head); + vboxsf_dir_buf_free(b); + } + kfree(p); +} + +int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d, + u64 handle) +{ + struct vboxsf_dir_buf *b; + u32 entries, size; + int err = 0; + void *buf; + + /* vboxsf_dirinfo returns 1 on end of dir */ + while (err == 0) { + b = vboxsf_dir_buf_alloc(&sf_d->info_list); + if (!b) { + err = -ENOMEM; + break; + } + + buf = b->buf; + size = b->free; + + err = vboxsf_dirinfo(sbi->root, handle, NULL, 0, 0, + &size, buf, &entries); + if (err < 0) + break; + + b->entries += entries; + b->free -= size; + b->used += size; + } + + if (b && b->used == 0) + vboxsf_dir_buf_free(b); + + /* -EILSEQ means the host could not translate a filename, ignore */ + if (err > 0 || err == -EILSEQ) + err = 0; + + return err; +} diff --git a/drivers/staging/vboxsf/vboxsf_wrappers.c b/drivers/staging/vboxsf/vboxsf_wrappers.c new file mode 100644 index 000000000000..bfc78a097dae --- /dev/null +++ b/drivers/staging/vboxsf/vboxsf_wrappers.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: MIT +/* + * Wrapper functions for the shfl host calls. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include +#include +#include +#include +#include "vfsmod.h" + +#define SHFL_REQUEST \ + (VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV_OTHER | \ + VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN) + +static u32 vboxsf_client_id; + +int vboxsf_connect(void) +{ + struct vbg_dev *gdev; + struct vmmdev_hgcm_service_location loc; + int err, vbox_status; + + loc.type = VMMDEV_HGCM_LOC_LOCALHOST_EXISTING; + strcpy(loc.u.localhost.service_name, "VBoxSharedFolders"); + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return -ENODEV; /* No guest-device */ + + err = vbg_hgcm_connect(gdev, SHFL_REQUEST, &loc, + &vboxsf_client_id, &vbox_status); + vbg_put_gdev(gdev); + + return err ? err : vbg_status_code_to_errno(vbox_status); +} + +void vboxsf_disconnect(void) +{ + struct vbg_dev *gdev; + int vbox_status; + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return; /* guest-device is gone, already disconnected */ + + vbg_hgcm_disconnect(gdev, SHFL_REQUEST, vboxsf_client_id, &vbox_status); + vbg_put_gdev(gdev); +} + +static int vboxsf_call(u32 function, void *parms, u32 parm_count, int *status) +{ + struct vbg_dev *gdev; + int err, vbox_status; + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return -ESHUTDOWN; /* guest-dev removed underneath us */ + + err = vbg_hgcm_call(gdev, SHFL_REQUEST, vboxsf_client_id, function, + U32_MAX, parms, parm_count, &vbox_status); + vbg_put_gdev(gdev); + + if (err < 0) + return err; + + if (status) + *status = vbox_status; + + return vbg_status_code_to_errno(vbox_status); +} + +int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root) +{ + struct shfl_map_folder parms; + int err, status; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.path.u.pointer.size = shfl_string_buf_size(folder_name); + parms.path.u.pointer.u.linear_addr = (uintptr_t)folder_name; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = 0; + + parms.delimiter.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.delimiter.u.value32 = '/'; + + parms.case_sensitive.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.case_sensitive.u.value32 = 1; + + err = vboxsf_call(SHFL_FN_MAP_FOLDER, &parms, SHFL_CPARMS_MAP_FOLDER, + &status); + if (err == -ENOSYS && status == VERR_NOT_IMPLEMENTED) + vbg_err("%s: Error host is too old\n", __func__); + + *root = parms.root.u.value32; + return err; +} + +int vboxsf_unmap_folder(u32 root) +{ + struct shfl_unmap_folder parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + return vboxsf_call(SHFL_FN_UNMAP_FOLDER, &parms, + SHFL_CPARMS_UNMAP_FOLDER, NULL); +} + +/** + * vboxsf_create - Create a new file or folder + * @root: Root of the shared folder in which to create the file + * @parsed_path: The path of the file or folder relative to the shared folder + * @param: create_parms Parameters for file/folder creation. + * + * Create a new file or folder or open an existing one in a shared folder. + * Note this function always returns 0 / success unless an exceptional condition + * occurs - out of memory, invalid arguments, etc. If the file or folder could + * not be opened or created, create_parms->handle will be set to + * SHFL_HANDLE_NIL on return. In this case the value in create_parms->result + * provides information as to why (e.g. SHFL_FILE_EXISTS), create_parms->result + * is also set on success as additional information. + * + * Returns: + * 0 or negative errno value. + */ +int vboxsf_create(u32 root, struct shfl_string *parsed_path, + struct shfl_createparms *create_parms) +{ + struct shfl_create parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.parms.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.parms.u.pointer.size = sizeof(struct shfl_createparms); + parms.parms.u.pointer.u.linear_addr = (uintptr_t)create_parms; + + return vboxsf_call(SHFL_FN_CREATE, &parms, SHFL_CPARMS_CREATE, NULL); +} + +int vboxsf_close(u32 root, u64 handle) +{ + struct shfl_close parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + + return vboxsf_call(SHFL_FN_CLOSE, &parms, SHFL_CPARMS_CLOSE, NULL); +} + +int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags) +{ + struct shfl_remove parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + + return vboxsf_call(SHFL_FN_REMOVE, &parms, SHFL_CPARMS_REMOVE, NULL); +} + +int vboxsf_rename(u32 root, struct shfl_string *src_path, + struct shfl_string *dest_path, u32 flags) +{ + struct shfl_rename parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.src.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.src.u.pointer.size = shfl_string_buf_size(src_path); + parms.src.u.pointer.u.linear_addr = (uintptr_t)src_path; + + parms.dest.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.dest.u.pointer.size = shfl_string_buf_size(dest_path); + parms.dest.u.pointer.u.linear_addr = (uintptr_t)dest_path; + + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + + return vboxsf_call(SHFL_FN_RENAME, &parms, SHFL_CPARMS_RENAME, NULL); +} + +int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf) +{ + struct shfl_read parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.offset.u.value64 = offset; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_READ, &parms, SHFL_CPARMS_READ, NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf) +{ + struct shfl_write parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.offset.u.value64 = offset; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_WRITE, &parms, SHFL_CPARMS_WRITE, NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +/* Returns 0 on success, 1 on end-of-dir, negative errno otherwise */ +int vboxsf_dirinfo(u32 root, u64 handle, + struct shfl_string *parsed_path, u32 flags, u32 index, + u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count) +{ + struct shfl_list parms; + int err, status; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + if (parsed_path) { + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + } else { + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_IN; + parms.path.u.pointer.size = 0; + parms.path.u.pointer.u.linear_addr = 0; + } + + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + parms.resume_point.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.resume_point.u.value32 = index; + parms.file_count.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.file_count.u.value32 = 0; /* out parameter only */ + + err = vboxsf_call(SHFL_FN_LIST, &parms, SHFL_CPARMS_LIST, &status); + if (err == -ENODATA && status == VERR_NO_MORE_FILES) + err = 1; + + *buf_len = parms.cb.u.value32; + *file_count = parms.file_count.u.value32; + return err; +} + +int vboxsf_fsinfo(u32 root, u64 handle, u32 flags, + u32 *buf_len, void *buf) +{ + struct shfl_information parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.info.u.pointer.size = *buf_len; + parms.info.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_INFORMATION, &parms, SHFL_CPARMS_INFORMATION, + NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +int vboxsf_readlink(u32 root, struct shfl_string *parsed_path, + u32 buf_len, u8 *buf) +{ + struct shfl_readLink parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + return vboxsf_call(SHFL_FN_READLINK, &parms, SHFL_CPARMS_READLINK, + NULL); +} + +int vboxsf_symlink(u32 root, struct shfl_string *new_path, + struct shfl_string *old_path, struct shfl_fsobjinfo *buf) +{ + struct shfl_symlink parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.new_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.new_path.u.pointer.size = shfl_string_buf_size(new_path); + parms.new_path.u.pointer.u.linear_addr = (uintptr_t)new_path; + + parms.old_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.old_path.u.pointer.size = shfl_string_buf_size(old_path); + parms.old_path.u.pointer.u.linear_addr = (uintptr_t)old_path; + + parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.info.u.pointer.size = sizeof(struct shfl_fsobjinfo); + parms.info.u.pointer.u.linear_addr = (uintptr_t)buf; + + return vboxsf_call(SHFL_FN_SYMLINK, &parms, SHFL_CPARMS_SYMLINK, NULL); +} + +int vboxsf_set_utf8(void) +{ + return vboxsf_call(SHFL_FN_SET_UTF8, NULL, 0, NULL); +} + +int vboxsf_set_symlinks(void) +{ + return vboxsf_call(SHFL_FN_SET_SYMLINKS, NULL, 0, NULL); +} diff --git a/drivers/staging/vboxsf/vfsmod.h b/drivers/staging/vboxsf/vfsmod.h new file mode 100644 index 000000000000..de650d65fbe4 --- /dev/null +++ b/drivers/staging/vboxsf/vfsmod.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: MIT */ +/* + * VirtualBox Guest Shared Folders support: module header. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#ifndef VFSMOD_H +#define VFSMOD_H + +#include +#include +#include +#include "shfl_hostintf.h" + +#define DIR_BUFFER_SIZE SZ_16K + +/* The cast is to prevent assignment of void * to pointers of arbitrary type */ +#define VBOXSF_SBI(sb) ((struct vboxsf_sbi *)(sb)->s_fs_info) +#define VBOXSF_I(i) container_of(i, struct vboxsf_inode, vfs_inode) + +struct vboxsf_options { + unsigned long ttl; + kuid_t uid; + kgid_t gid; + bool dmode_set; + bool fmode_set; + umode_t dmode; + umode_t fmode; + umode_t dmask; + umode_t fmask; +}; + +struct vboxsf_fs_context { + struct vboxsf_options o; + char *nls_name; +}; + +/* per-shared folder information */ +struct vboxsf_sbi { + struct vboxsf_options o; + struct shfl_fsobjinfo root_info; + struct idr ino_idr; + spinlock_t ino_idr_lock; /* This protects ino_idr */ + struct nls_table *nls; + u32 next_generation; + u32 root; + int bdi_id; +}; + +/* per-inode information */ +struct vboxsf_inode { + /* some information was changed, update data on next revalidate */ + int force_restat; + /* list of open handles for this inode + lock protecting it */ + struct list_head handle_list; + /* This mutex protects handle_list accesses */ + struct mutex handle_list_mutex; + /* The VFS inode struct */ + struct inode vfs_inode; +}; + +struct vboxsf_dir_info { + struct list_head info_list; +}; + +struct vboxsf_dir_buf { + size_t entries; + size_t free; + size_t used; + void *buf; + struct list_head head; +}; + +/* globals */ +extern const struct inode_operations vboxsf_dir_iops; +extern const struct inode_operations vboxsf_lnk_iops; +extern const struct inode_operations vboxsf_reg_iops; +extern const struct file_operations vboxsf_dir_fops; +extern const struct file_operations vboxsf_reg_fops; +extern const struct address_space_operations vboxsf_reg_aops; +extern const struct dentry_operations vboxsf_dentry_ops; + +/* from utils.c */ +struct inode *vboxsf_new_inode(struct super_block *sb); +void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode, + const struct shfl_fsobjinfo *info); +int vboxsf_create_at_dentry(struct dentry *dentry, + struct shfl_createparms *params); +int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path, + struct shfl_fsobjinfo *info); +int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info); +int vboxsf_inode_revalidate(struct dentry *dentry); +int vboxsf_getattr(const struct path *path, struct kstat *kstat, + u32 request_mask, unsigned int query_flags); +int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr); +struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi, + struct dentry *dentry); +int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, + const unsigned char *utf8_name, size_t utf8_len); +struct vboxsf_dir_info *vboxsf_dir_info_alloc(void); +void vboxsf_dir_info_free(struct vboxsf_dir_info *p); +int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d, + u64 handle); + +/* from vboxsf_wrappers.c */ +int vboxsf_connect(void); +void vboxsf_disconnect(void); + +int vboxsf_create(u32 root, struct shfl_string *parsed_path, + struct shfl_createparms *create_parms); + +int vboxsf_close(u32 root, u64 handle); +int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags); +int vboxsf_rename(u32 root, struct shfl_string *src_path, + struct shfl_string *dest_path, u32 flags); + +int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf); +int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf); + +int vboxsf_dirinfo(u32 root, u64 handle, + struct shfl_string *parsed_path, u32 flags, u32 index, + u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count); +int vboxsf_fsinfo(u32 root, u64 handle, u32 flags, + u32 *buf_len, void *buf); + +int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root); +int vboxsf_unmap_folder(u32 root); + +int vboxsf_readlink(u32 root, struct shfl_string *parsed_path, + u32 buf_len, u8 *buf); +int vboxsf_symlink(u32 root, struct shfl_string *new_path, + struct shfl_string *old_path, struct shfl_fsobjinfo *buf); + +int vboxsf_set_utf8(void); +int vboxsf_set_symlinks(void); + +#endif -- cgit v1.2.3 From d5798141fd54cea074c3429d5803f6c41ade0ca8 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Wed, 30 Oct 2019 08:16:43 +0000 Subject: gfs2: Fix initialisation of args for remount When gfs2 was converted to use fs_context, the initialisation of the mount args structure to the currently active args was lost with the removal of gfs2_remount_fs(), so the checks of the new args on remount became checks against the default values instead of the current ones. This caused unexpected remount behaviour and test failures (xfstests generic/294, generic/306 and generic/452). Reinstate the args initialisation, this time in gfs2_init_fs_context() and conditional upon fc->purpose, as that's the only time we get control before the mount args are parsed in the remount process. Fixes: 1f52aa08d12f ("gfs2: Convert gfs2 to fs_context") Signed-off-by: Andrew Price Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index dc61af2c4d5e..18daf494abab 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1540,17 +1540,23 @@ static int gfs2_init_fs_context(struct fs_context *fc) { struct gfs2_args *args; - args = kzalloc(sizeof(*args), GFP_KERNEL); + args = kmalloc(sizeof(*args), GFP_KERNEL); if (args == NULL) return -ENOMEM; - args->ar_quota = GFS2_QUOTA_DEFAULT; - args->ar_data = GFS2_DATA_DEFAULT; - args->ar_commit = 30; - args->ar_statfs_quantum = 30; - args->ar_quota_quantum = 60; - args->ar_errors = GFS2_ERRORS_DEFAULT; + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + struct gfs2_sbd *sdp = fc->root->d_sb->s_fs_info; + *args = sdp->sd_args; + } else { + memset(args, 0, sizeof(*args)); + args->ar_quota = GFS2_QUOTA_DEFAULT; + args->ar_data = GFS2_DATA_DEFAULT; + args->ar_commit = 30; + args->ar_statfs_quantum = 30; + args->ar_quota_quantum = 60; + args->ar_errors = GFS2_ERRORS_DEFAULT; + } fc->fs_private = args; fc->ops = &gfs2_context_ops; return 0; -- cgit v1.2.3 From f5c8d290634a470600e1ce61733ec54d05a897e8 Mon Sep 17 00:00:00 2001 From: Sanket Parmar Date: Tue, 29 Oct 2019 12:24:41 +0000 Subject: usb: cdns3: gadget: reset EP_CLAIMED flag while unloading EP_CLAIMED flag is used to track the claimed endpoints. While unloading the module, Reset EP_CLAIMED flag for all enabled endpoints. So that it can be reused. Signed-off-by: Sanket Parmar Acked-by: Peter Chen Reviewed-by: Roger Quadros Acked-by: Felipe Balbi Link: https://lore.kernel.org/r/20191029122441.5816-1-sparmar@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index d9e7f2d06098..8421fc028c40 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -2379,6 +2379,8 @@ static int cdns3_gadget_udc_stop(struct usb_gadget *gadget) writel(EP_CMD_EPRST, &priv_dev->regs->ep_cmd); readl_poll_timeout_atomic(&priv_dev->regs->ep_cmd, val, !(val & EP_CMD_EPRST), 1, 100); + + priv_ep->flags &= ~EP_CLAIMED; } /* disable interrupt for device */ -- cgit v1.2.3 From 94e259f81a714c96f381d362228fc4743db49742 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 30 Oct 2019 14:16:07 +0200 Subject: usb: cdns3: gadget: Fix g_audio use case when connected to Super-Speed host Take into account gadget driver's speed limit when programming controller speed. Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20191030121607.21739-1-rogerq@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/gadget.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 8421fc028c40..4c1e75509303 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -2343,9 +2343,35 @@ static int cdns3_gadget_udc_start(struct usb_gadget *gadget, { struct cdns3_device *priv_dev = gadget_to_cdns3_device(gadget); unsigned long flags; + enum usb_device_speed max_speed = driver->max_speed; spin_lock_irqsave(&priv_dev->lock, flags); priv_dev->gadget_driver = driver; + + /* limit speed if necessary */ + max_speed = min(driver->max_speed, gadget->max_speed); + + switch (max_speed) { + case USB_SPEED_FULL: + writel(USB_CONF_SFORCE_FS, &priv_dev->regs->usb_conf); + writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf); + break; + case USB_SPEED_HIGH: + writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf); + break; + case USB_SPEED_SUPER: + break; + default: + dev_err(priv_dev->dev, + "invalid maximum_speed parameter %d\n", + max_speed); + /* fall through */ + case USB_SPEED_UNKNOWN: + /* default to superspeed */ + max_speed = USB_SPEED_SUPER; + break; + } + cdns3_gadget_config(priv_dev); spin_unlock_irqrestore(&priv_dev->lock, flags); return 0; @@ -2575,12 +2601,7 @@ static int cdns3_gadget_start(struct cdns3 *cdns) /* Check the maximum_speed parameter */ switch (max_speed) { case USB_SPEED_FULL: - writel(USB_CONF_SFORCE_FS, &priv_dev->regs->usb_conf); - writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf); - break; case USB_SPEED_HIGH: - writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf); - break; case USB_SPEED_SUPER: break; default: -- cgit v1.2.3 From 302d5a80d232134246032bc4263fd7facdddb8f1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Oct 2019 21:41:20 +0100 Subject: ALSA: hda - Fix mutex deadlock in HDMI codec driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit ade49db337a9 ("ALSA: hda/hdmi - Allow audio component for AMD/ATI and Nvidia HDMI") introduced the spec->pcm_lock mutex lock to the whole generic_hdmi_init() function for avoiding the race with the audio component registration. However, this caused a dead lock when the unsolicited event is handled without the audio component, as the codec gets runtime-resumed in hdmi_present_sense() which is already inside the spec->pcm_lock in its caller. For avoiding this deadlock, add a new mutex only for the audio component binding that is used in both generic_hdmi_init() and the audio notifier registration where the jack callbacks are handled / re-registered. Fixes: ade49db337a9 ("ALSA: hda/hdmi - Allow audio component for AMD/ATI and Nvidia HDMI") Reported-and-tested-by: Ville Syrjälä Link: https://lore.kernel.org/r/s5himo7i89i.wl-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 795cbda32cbb..b72553710ffb 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -145,6 +145,7 @@ struct hdmi_spec { struct snd_array pins; /* struct hdmi_spec_per_pin */ struct hdmi_pcm pcm_rec[16]; struct mutex pcm_lock; + struct mutex bind_lock; /* for audio component binding */ /* pcm_bitmap means which pcms have been assigned to pins*/ unsigned long pcm_bitmap; int pcm_used; /* counter of pcm_rec[] */ @@ -2258,7 +2259,7 @@ static int generic_hdmi_init(struct hda_codec *codec) struct hdmi_spec *spec = codec->spec; int pin_idx; - mutex_lock(&spec->pcm_lock); + mutex_lock(&spec->bind_lock); spec->use_jack_detect = !codec->jackpoll_interval; for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) { struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx); @@ -2275,7 +2276,7 @@ static int generic_hdmi_init(struct hda_codec *codec) snd_hda_jack_detect_enable_callback(codec, pin_nid, jack_callback); } - mutex_unlock(&spec->pcm_lock); + mutex_unlock(&spec->bind_lock); return 0; } @@ -2382,6 +2383,7 @@ static int alloc_generic_hdmi(struct hda_codec *codec) spec->ops = generic_standard_hdmi_ops; spec->dev_num = 1; /* initialize to 1 */ mutex_init(&spec->pcm_lock); + mutex_init(&spec->bind_lock); snd_hdac_register_chmap_ops(&codec->core, &spec->chmap); spec->chmap.ops.get_chmap = hdmi_get_chmap; @@ -2451,7 +2453,7 @@ static void generic_acomp_notifier_set(struct drm_audio_component *acomp, int i; spec = container_of(acomp->audio_ops, struct hdmi_spec, drm_audio_ops); - mutex_lock(&spec->pcm_lock); + mutex_lock(&spec->bind_lock); spec->use_acomp_notifier = use_acomp; spec->codec->relaxed_resume = use_acomp; /* reprogram each jack detection logic depending on the notifier */ @@ -2461,7 +2463,7 @@ static void generic_acomp_notifier_set(struct drm_audio_component *acomp, get_pin(spec, i)->pin_nid, use_acomp); } - mutex_unlock(&spec->pcm_lock); + mutex_unlock(&spec->bind_lock); } /* enable / disable the notifier via master bind / unbind */ -- cgit v1.2.3 From f37f05503575c59020dacd36e999f4e8b3dbc115 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 27 Oct 2019 20:53:08 +0100 Subject: mt76: mt76x2e: disable pcie_aspm by default On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and instability. Since mt76x2 series does not manage PCIE PS states, first we try to disable ASPM using pci_disable_link_state. If it fails, we will disable PCIE PS configuring PCI registers. This patch has been successfully tested on U7612E-H1 mini-pice card Tested-by: Oleksandr Natalenko Signed-off-by: Felix Fietkau Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/Makefile | 2 ++ drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/wireless/mediatek/mt76/mt76x2/pci.c | 2 ++ drivers/net/wireless/mediatek/mt76/pci.c | 46 +++++++++++++++++++++++++ 4 files changed, 51 insertions(+) create mode 100644 drivers/net/wireless/mediatek/mt76/pci.c diff --git a/drivers/net/wireless/mediatek/mt76/Makefile b/drivers/net/wireless/mediatek/mt76/Makefile index 4d03596e891f..d7a1ddc9e407 100644 --- a/drivers/net/wireless/mediatek/mt76/Makefile +++ b/drivers/net/wireless/mediatek/mt76/Makefile @@ -8,6 +8,8 @@ mt76-y := \ mmio.o util.o trace.o dma.o mac80211.o debugfs.o eeprom.o \ tx.o agg-rx.o mcu.o +mt76-$(CONFIG_PCI) += pci.o + mt76-usb-y := usb.o usb_trace.o CFLAGS_trace.o := -I$(src) diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 570c159515a0..dc468ed9434a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val, #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__) void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs); +void mt76_pci_disable_aspm(struct pci_dev *pdev); static inline u16 mt76_chip(struct mt76_dev *dev) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c index 73c3104f8858..cf611d1b817c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* RG_SSUSB_CDR_BR_PE1D = 0x3 */ mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3); + mt76_pci_disable_aspm(pdev); + return 0; error: diff --git a/drivers/net/wireless/mediatek/mt76/pci.c b/drivers/net/wireless/mediatek/mt76/pci.c new file mode 100644 index 000000000000..04c5a692bc85 --- /dev/null +++ b/drivers/net/wireless/mediatek/mt76/pci.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: ISC +/* + * Copyright (C) 2019 Lorenzo Bianconi + */ + +#include + +void mt76_pci_disable_aspm(struct pci_dev *pdev) +{ + struct pci_dev *parent = pdev->bus->self; + u16 aspm_conf, parent_aspm_conf = 0; + + pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf); + aspm_conf &= PCI_EXP_LNKCTL_ASPMC; + if (parent) { + pcie_capability_read_word(parent, PCI_EXP_LNKCTL, + &parent_aspm_conf); + parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC; + } + + if (!aspm_conf && (!parent || !parent_aspm_conf)) { + /* aspm already disabled */ + return; + } + + dev_info(&pdev->dev, "disabling ASPM %s %s\n", + (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "", + (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : ""); + + if (IS_ENABLED(CONFIG_PCIEASPM)) { + int err; + + err = pci_disable_link_state(pdev, aspm_conf); + if (!err) + return; + } + + /* both device and parent should have the same ASPM setting. + * disable ASPM in downstream component first and then upstream. + */ + pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_conf); + if (parent) + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL, + aspm_conf); +} +EXPORT_SYMBOL_GPL(mt76_pci_disable_aspm); -- cgit v1.2.3 From 7bd0650be63cbb9e45e394d689c81365fe48e495 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 27 Oct 2019 20:53:09 +0100 Subject: mt76: dma: fix buffer unmap with non-linear skbs mt76 dma layer is supposed to unmap skb data buffers while keep txwi mapped on hw dma ring. At the moment mt76 wrongly unmap txwi or does not unmap data fragments in even positions for non-linear skbs. This issue may result in hw hangs with A-MSDU if the system relies on IOMMU or SWIOTLB. Fix this behaviour properly unmapping data fragments on non-linear skbs. Fixes: 17f1de56df05 ("mt76: add common code shared between multiple chipsets") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/dma.c | 6 ++++-- drivers/net/wireless/mediatek/mt76/mt76.h | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index c747eb24581c..8f69d00bd940 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -53,8 +53,10 @@ mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q, u32 ctrl; int i, idx = -1; - if (txwi) + if (txwi) { q->entry[q->head].txwi = DMA_DUMMY_DATA; + q->entry[q->head].skip_buf0 = true; + } for (i = 0; i < nbufs; i += 2, buf += 2) { u32 buf0 = buf[0].addr, buf1 = 0; @@ -97,7 +99,7 @@ mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx, __le32 __ctrl = READ_ONCE(q->desc[idx].ctrl); u32 ctrl = le32_to_cpu(__ctrl); - if (!e->txwi || !e->skb) { + if (!e->skip_buf0) { __le32 addr = READ_ONCE(q->desc[idx].buf0); u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctrl); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index dc468ed9434a..8aec7ccf2d79 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -93,8 +93,9 @@ struct mt76_queue_entry { struct urb *urb; }; enum mt76_txq_id qid; - bool schedule; - bool done; + bool skip_buf0:1; + bool schedule:1; + bool done:1; }; struct mt76_queue_regs { -- cgit v1.2.3 From 3d206e6899a07fe853f703f7e68f84b48b919129 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 29 Oct 2019 17:47:20 +0200 Subject: iwlwifi: fw api: support new API for scan config cmd The API was reduced to include only knowledge currently needed by the FW scan logic, the rest is legacy. Support the new, reduced version. Using the old API with newer firmwares (starting from iwlwifi-*-50.ucode, which implements and requires the new API version) causes an assertion failure similar to this one: [ 2.854505] iwlwifi 0000:00:14.3: 0x20000038 | BAD_COMMAND Signed-off-by: Ayala Beker Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/fw/api/scan.h | 22 +++++++++++-- drivers/net/wireless/intel/iwlwifi/fw/file.h | 3 ++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 6 ++++ drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 40 +++++++++++++++++++----- 4 files changed, 61 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h index 39c64850cb6f..c0750ced5ac2 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h @@ -520,7 +520,7 @@ struct iwl_scan_dwell { } __packed; /** - * struct iwl_scan_config + * struct iwl_scan_config_v1 * @flags: enum scan_config_flags * @tx_chains: valid_tx antenna - ANT_* definitions * @rx_chains: valid_rx antenna - ANT_* definitions @@ -552,7 +552,7 @@ struct iwl_scan_config_v1 { #define SCAN_LB_LMAC_IDX 0 #define SCAN_HB_LMAC_IDX 1 -struct iwl_scan_config { +struct iwl_scan_config_v2 { __le32 flags; __le32 tx_chains; __le32 rx_chains; @@ -564,6 +564,24 @@ struct iwl_scan_config { u8 bcast_sta_id; u8 channel_flags; u8 channel_array[]; +} __packed; /* SCAN_CONFIG_DB_CMD_API_S_2 */ + +/** + * struct iwl_scan_config + * @enable_cam_mode: whether to enable CAM mode. + * @enable_promiscouos_mode: whether to enable promiscouos mode + * @bcast_sta_id: the index of the station in the fw + * @reserved: reserved + * @tx_chains: valid_tx antenna - ANT_* definitions + * @rx_chains: valid_rx antenna - ANT_* definitions + */ +struct iwl_scan_config { + u8 enable_cam_mode; + u8 enable_promiscouos_mode; + u8 bcast_sta_id; + u8 reserved; + __le32 tx_chains; + __le32 rx_chains; } __packed; /* SCAN_CONFIG_DB_CMD_API_S_3 */ /** diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index 423cc0cf8e78..0d5bc4ce5c07 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -288,6 +288,8 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t; * STA_CONTEXT_DOT11AX_API_S * @IWL_UCODE_TLV_CAPA_SAR_TABLE_VER: This ucode supports different sar * version tables. + * @IWL_UCODE_TLV_API_REDUCED_SCAN_CONFIG: This ucode supports v3 of + * SCAN_CONFIG_DB_CMD_API_S. * * @NUM_IWL_UCODE_TLV_API: number of bits used */ @@ -321,6 +323,7 @@ enum iwl_ucode_tlv_api { IWL_UCODE_TLV_API_WOWLAN_TCP_SYN_WAKE = (__force iwl_ucode_tlv_api_t)53, IWL_UCODE_TLV_API_FTM_RTT_ACCURACY = (__force iwl_ucode_tlv_api_t)54, IWL_UCODE_TLV_API_SAR_TABLE_VER = (__force iwl_ucode_tlv_api_t)55, + IWL_UCODE_TLV_API_REDUCED_SCAN_CONFIG = (__force iwl_ucode_tlv_api_t)56, IWL_UCODE_TLV_API_ADWELL_HB_DEF_N_AP = (__force iwl_ucode_tlv_api_t)57, IWL_UCODE_TLV_API_SCAN_EXT_CHAN_VER = (__force iwl_ucode_tlv_api_t)58, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 843d00bf2bd5..5ca50f39a023 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1405,6 +1405,12 @@ static inline bool iwl_mvm_is_scan_ext_chan_supported(struct iwl_mvm *mvm) IWL_UCODE_TLV_API_SCAN_EXT_CHAN_VER); } +static inline bool iwl_mvm_is_reduced_config_scan_supported(struct iwl_mvm *mvm) +{ + return fw_has_api(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_API_REDUCED_SCAN_CONFIG); +} + static inline bool iwl_mvm_has_new_rx_stats_api(struct iwl_mvm *mvm) { return fw_has_api(&mvm->fw->ucode_capa, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index f6b3045badbd..fcafa22ec6ce 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1137,11 +1137,11 @@ static void iwl_mvm_fill_scan_config_v1(struct iwl_mvm *mvm, void *config, iwl_mvm_fill_channels(mvm, cfg->channel_array, max_channels); } -static void iwl_mvm_fill_scan_config(struct iwl_mvm *mvm, void *config, - u32 flags, u8 channel_flags, - u32 max_channels) +static void iwl_mvm_fill_scan_config_v2(struct iwl_mvm *mvm, void *config, + u32 flags, u8 channel_flags, + u32 max_channels) { - struct iwl_scan_config *cfg = config; + struct iwl_scan_config_v2 *cfg = config; cfg->flags = cpu_to_le32(flags); cfg->tx_chains = cpu_to_le32(iwl_mvm_get_valid_tx_ant(mvm)); @@ -1185,7 +1185,7 @@ static void iwl_mvm_fill_scan_config(struct iwl_mvm *mvm, void *config, iwl_mvm_fill_channels(mvm, cfg->channel_array, max_channels); } -int iwl_mvm_config_scan(struct iwl_mvm *mvm) +static int iwl_mvm_legacy_config_scan(struct iwl_mvm *mvm) { void *cfg; int ret, cmd_size; @@ -1217,7 +1217,7 @@ int iwl_mvm_config_scan(struct iwl_mvm *mvm) } if (iwl_mvm_cdb_scan_api(mvm)) - cmd_size = sizeof(struct iwl_scan_config); + cmd_size = sizeof(struct iwl_scan_config_v2); else cmd_size = sizeof(struct iwl_scan_config_v1); cmd_size += num_channels; @@ -1254,8 +1254,8 @@ int iwl_mvm_config_scan(struct iwl_mvm *mvm) flags |= (iwl_mvm_is_scan_fragmented(hb_type)) ? SCAN_CONFIG_FLAG_SET_LMAC2_FRAGMENTED : SCAN_CONFIG_FLAG_CLEAR_LMAC2_FRAGMENTED; - iwl_mvm_fill_scan_config(mvm, cfg, flags, channel_flags, - num_channels); + iwl_mvm_fill_scan_config_v2(mvm, cfg, flags, channel_flags, + num_channels); } else { iwl_mvm_fill_scan_config_v1(mvm, cfg, flags, channel_flags, num_channels); @@ -1277,6 +1277,30 @@ int iwl_mvm_config_scan(struct iwl_mvm *mvm) return ret; } +int iwl_mvm_config_scan(struct iwl_mvm *mvm) +{ + struct iwl_scan_config cfg; + struct iwl_host_cmd cmd = { + .id = iwl_cmd_id(SCAN_CFG_CMD, IWL_ALWAYS_LONG_GROUP, 0), + .len[0] = sizeof(cfg), + .data[0] = &cfg, + .dataflags[0] = IWL_HCMD_DFL_NOCOPY, + }; + + if (!iwl_mvm_is_reduced_config_scan_supported(mvm)) + return iwl_mvm_legacy_config_scan(mvm); + + memset(&cfg, 0, sizeof(cfg)); + + cfg.bcast_sta_id = mvm->aux_sta.sta_id; + cfg.tx_chains = cpu_to_le32(iwl_mvm_get_valid_tx_ant(mvm)); + cfg.rx_chains = cpu_to_le32(iwl_mvm_scan_rx_ant(mvm)); + + IWL_DEBUG_SCAN(mvm, "Sending UMAC scan config\n"); + + return iwl_mvm_send_cmd(mvm, &cmd); +} + static int iwl_mvm_scan_uid_by_status(struct iwl_mvm *mvm, int status) { int i; -- cgit v1.2.3 From e5574f61e9d8274c49e9a5d943abde8e938d57e1 Mon Sep 17 00:00:00 2001 From: chen gong Date: Wed, 23 Oct 2019 13:54:32 +0800 Subject: drm/amdgpu: Fix SDMA hang when performing VKexample test VKexample test hang during Occlusion/SDMA/Varia runs. Clear XNACK_WATERMK in reg SDMA0_UTCL1_WATERMK to fix this issue. Signed-off-by: chen gong Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 78452cf0115d..4554e72c8378 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -254,6 +254,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) }; static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, -- cgit v1.2.3 From 9bdf63d3579e36942f4b91d3558a90da8116bb40 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 22 Oct 2019 19:22:11 +0200 Subject: drm/amdgpu/sdma5: do not execute 0-sized IBs (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This seems to help with https://bugs.freedesktop.org/show_bug.cgi?id=111481. v2: insert a NOP instead of skipping all 0-sized IBs to avoid breaking older hw Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 241a4e57cf4a..354e6200ca9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -309,6 +309,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); job->vm_needs_flush = true; + job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; amdgpu_ring_pad_ib(ring, &job->ibs[0]); r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); -- cgit v1.2.3 From 40ba9796983bfd202619ffb7518d0fbdc9547d53 Mon Sep 17 00:00:00 2001 From: Zhan liu Date: Tue, 22 Oct 2019 10:50:21 -0400 Subject: drm/amd/display: Change Navi14's DWB flag to 1 [Why] DWB (Display Writeback) flag needs to be enabled as 1, or system will throw out a few warnings when creating dcn20 resource pool. Also, Navi14's dwb setting needs to match Navi10's, which has already been set to 1. [How] Change value of num_dwb from 0 to 1. Signed-off-by: Zhan Liu Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 5a2763daff4d..dfb208285a9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -814,7 +814,7 @@ static const struct resource_caps res_cap_nv14 = { .num_audio = 6, .num_stream_encoder = 5, .num_pll = 5, - .num_dwb = 0, + .num_dwb = 1, .num_ddc = 5, }; -- cgit v1.2.3 From f52ebe1f888dfae68d7cffabf5ac898f8cb64fb3 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Thu, 24 Oct 2019 18:03:17 +0800 Subject: drm/amdgpu/gfx10: update gfx golden settings update registers: mmCGTT_SPI_CLK_CTRL Reviewed-by: Feifei Xu Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 957811b73672..63f2a340ce27 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -93,7 +93,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), -- cgit v1.2.3 From 3dde767f14dcdbe8231645cac01051cebb4feb57 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Thu, 24 Oct 2019 18:04:52 +0800 Subject: drm/amdgpu/gfx10: update gfx golden settings for navi14 update registers: mmCGTT_SPI_CLK_CTRL Reviewed-by: Feifei Xu Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 63f2a340ce27..d846ba9db1c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -140,7 +140,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), -- cgit v1.2.3 From 47661f6dad42e1241cdef82c5e06cfb7027a3f59 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Thu, 24 Oct 2019 18:06:06 +0800 Subject: drm/amdgpu/gfx10: update gfx golden settings for navi12 update registers: mmCGTT_SPI_CLK_CTRL Reviewed-by: Feifei Xu Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d846ba9db1c4..8dfc775626a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -179,7 +179,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), -- cgit v1.2.3 From 167bf96014a095753053595f3224fcdeb49ac3c8 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 24 Oct 2019 15:39:06 -0400 Subject: drm/sched: Set error to s_fence if HW job submission failed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: When run_job fails and HW fence returned is NULL we still signal the s_fence to avoid hangs but the user has no way of knowing if the actual HW job was ran and finished. Fix: Allow .run_job implementations to return ERR_PTR in the fence pointer returned and then set this error for s_fence->finished fence so whoever wait on this fence can inspect the signaled fence for an error. Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/sched_main.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 9a0ee74d82dc..f39b97ed4ade 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -479,6 +479,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) struct drm_sched_job *s_job, *tmp; uint64_t guilty_context; bool found_guilty = false; + struct dma_fence *fence; list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct drm_sched_fence *s_fence = s_job->s_fence; @@ -492,7 +493,16 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) dma_fence_set_error(&s_fence->finished, -ECANCELED); dma_fence_put(s_job->s_fence->parent); - s_job->s_fence->parent = sched->ops->run_job(s_job); + fence = sched->ops->run_job(s_job); + + if (IS_ERR_OR_NULL(fence)) { + s_job->s_fence->parent = NULL; + dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); + } else { + s_job->s_fence->parent = fence; + } + + } } EXPORT_SYMBOL(drm_sched_resubmit_jobs); @@ -720,7 +730,7 @@ static int drm_sched_main(void *param) fence = sched->ops->run_job(sched_job); drm_sched_fence_scheduled(s_fence); - if (fence) { + if (!IS_ERR_OR_NULL(fence)) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, &sched_job->cb, drm_sched_process_job); @@ -730,8 +740,11 @@ static int drm_sched_main(void *param) DRM_ERROR("fence add callback failed (%d)\n", r); dma_fence_put(fence); - } else + } else { + + dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); drm_sched_process_job(NULL, &sched_job->cb); + } wake_up(&sched->job_scheduled); } -- cgit v1.2.3 From 57c0f58e9f562089de5f0b60da103677d232374c Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 24 Oct 2019 15:44:10 -0400 Subject: drm/amdgpu: If amdgpu_ib_schedule fails return back the error. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use ERR_PTR to return back the error happened during amdgpu_ib_schedule. Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 9d76e0923a5a..96b2a31ccfed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -218,7 +218,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched); struct dma_fence *fence = NULL, *finished; struct amdgpu_job *job; - int r; + int r = 0; job = to_amdgpu_job(sched_job); finished = &job->base.s_fence->finished; @@ -243,6 +243,8 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) job->fence = dma_fence_get(fence); amdgpu_job_free_resources(job); + + fence = r ? ERR_PTR(r) : fence; return fence; } -- cgit v1.2.3 From 8775e89fa7121535d2da738c95167b8c65aa6e90 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Thu, 3 Oct 2019 15:09:53 -0400 Subject: drm/amd/display: do not synchronize "drr" displays [why] A display that supports DRR can never really be considered "synchronized" with any other display because we can dynamically enable DRR (i.e. without modeset). this will cause their relative CRTC positions to drift and lose sync. this will disrupt features such as MCLK switching that assume and depend on their permanent alignment (that can only change with modeset) [how] check for ignore_msa in stream when considered synchronizability this ignore_msa is basically actually implemented as "supports drr" Signed-off-by: Jun Lei Reviewed-by: Yongqiang Sun Acked-by: Anthony Koo Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 8f70295179ff..f25ac17f47fa 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -404,6 +404,9 @@ bool resource_are_streams_timing_synchronizable( if (stream1->view_format != stream2->view_format) return false; + if (stream1->ignore_msa_timing_param || stream2->ignore_msa_timing_param) + return false; + return true; } static bool is_dp_and_hdmi_sharable( @@ -1540,6 +1543,9 @@ bool dc_is_stream_unchanged( if (!are_stream_backends_same(old_stream, stream)) return false; + if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) + return false; + return true; } -- cgit v1.2.3 From ceba1a0128a68b0a045bbd0c020994e5c5c737a8 Mon Sep 17 00:00:00 2001 From: Aidan Yang Date: Wed, 2 Oct 2019 10:47:31 -0400 Subject: drm/amd/display: Allow inverted gamma [why] There's a use case for inverted gamma and it's been confirmed that negative slopes are ok. [how] Remove code for blocking non-monotonically increasing gamma Signed-off-by: Aidan Yang Reviewed-by: Krunoslav Kovac Acked-by: Leo Li Acked-by: Reza Amini Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 01c7e30b9ce1..bbd6e01b3eca 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -393,6 +393,10 @@ bool cm_helper_translate_curve_to_hw_format( rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; + rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red; + rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green; + rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue; + // All 3 color channels have same x corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), dc_fixpt_from_int(region_start)); @@ -464,13 +468,6 @@ bool cm_helper_translate_curve_to_hw_format( i = 1; while (i != hw_points + 1) { - if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) - rgb_plus_1->red = rgb->red; - if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) - rgb_plus_1->green = rgb->green; - if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) - rgb_plus_1->blue = rgb->blue; - rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); @@ -562,6 +559,10 @@ bool cm_helper_translate_curve_to_degamma_hw_format( rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; + rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red; + rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green; + rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue; + corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), dc_fixpt_from_int(region_start)); corner_points[0].green.x = corner_points[0].red.x; @@ -624,13 +625,6 @@ bool cm_helper_translate_curve_to_degamma_hw_format( i = 1; while (i != hw_points + 1) { - if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) - rgb_plus_1->red = rgb->red; - if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) - rgb_plus_1->green = rgb->green; - if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) - rgb_plus_1->blue = rgb->blue; - rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); -- cgit v1.2.3 From 7c37d399c2b84d4b79de4d512a38373f1d71ab90 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Thu, 19 Sep 2019 17:43:45 -0400 Subject: drm/amd/display: add 50us buffer as WA for pstate switch in active Signed-off-by: Jun Lei Reviewed-by: Aric Cyr Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c index 649883777f62..6c6c486b774a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -2577,7 +2577,8 @@ static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPer mode_lib->vba.MinActiveDRAMClockChangeMargin + mode_lib->vba.DRAMClockChangeLatency; - if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { + if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 50) { + mode_lib->vba.DRAMClockChangeWatermark += 25; mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; } else { if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { -- cgit v1.2.3 From bc2fde42e2418808dbfc04de1a6da91d7d31cf1a Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 3 Oct 2019 11:54:15 -0400 Subject: drm/amd/display: Passive DP->HDMI dongle detection fix [WHY] i2c_read is called to differentiate passive DP->HDMI and DP->DVI-D dongles The call is expected to fail in DVI-D case but pass in HDMI case Some HDMI dongles have a chance to fail as well, causing misdetection as DVI-D [HOW] Retry i2c_read to ensure failed result is valid Signed-off-by: Michael Strauss Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c | 24 +++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 505967b48e14..51991bf26a93 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -374,6 +374,7 @@ void dal_ddc_service_i2c_query_dp_dual_mode_adaptor( enum display_dongle_type *dongle = &sink_cap->dongle_type; uint8_t type2_dongle_buf[DP_ADAPTOR_TYPE2_SIZE]; bool is_type2_dongle = false; + int retry_count = 2; struct dp_hdmi_dongle_signature_data *dongle_signature; /* Assume we have no valid DP passive dongle connected */ @@ -386,13 +387,24 @@ void dal_ddc_service_i2c_query_dp_dual_mode_adaptor( DP_HDMI_DONGLE_ADDRESS, type2_dongle_buf, sizeof(type2_dongle_buf))) { - *dongle = DISPLAY_DONGLE_DP_DVI_DONGLE; - sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK; + /* Passive HDMI dongles can sometimes fail here without retrying*/ + while (retry_count > 0) { + if (i2c_read(ddc, + DP_HDMI_DONGLE_ADDRESS, + type2_dongle_buf, + sizeof(type2_dongle_buf))) + break; + retry_count--; + } + if (retry_count == 0) { + *dongle = DISPLAY_DONGLE_DP_DVI_DONGLE; + sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK; - CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf), - "DP-DVI passive dongle %dMhz: ", - DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000); - return; + CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf), + "DP-DVI passive dongle %dMhz: ", + DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000); + return; + } } /* Check if Type 2 dongle.*/ -- cgit v1.2.3 From 385857adb8154563840e5b0f200254126618f464 Mon Sep 17 00:00:00 2001 From: Zhan liu Date: Thu, 17 Oct 2019 14:55:56 -0400 Subject: drm/amd/display: setting the DIG_MODE to the correct value. [Why] This patch is for fixing Navi14 HDMI display pink screen issue. [How] Call stream->link->link_enc->funcs->setup twice. This is setting the DIG_MODE to the correct value after having been overridden by the call to transmitter control. Signed-off-by: Zhan Liu Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index ca20b150afcc..9c58670d5414 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2767,6 +2767,15 @@ void core_link_enable_stream( CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, COLOR_DEPTH_UNDEFINED); + /* This second call is needed to reconfigure the DIG + * as a workaround for the incorrect value being applied + * from transmitter control. + */ + if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) + stream->link->link_enc->funcs->setup( + stream->link->link_enc, + pipe_ctx->stream->signal); + #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT if (pipe_ctx->stream->timing.flags.DSC) { if (dc_is_dp_signal(pipe_ctx->stream->signal) || -- cgit v1.2.3 From 364593f3ee5fdefc6efd89475e1804c928b4e6ba Mon Sep 17 00:00:00 2001 From: zhongshiqi Date: Wed, 23 Oct 2019 16:32:23 +0800 Subject: dc.c:use kzalloc without test dc.c:583:null check is needed after using kzalloc function Reviewed-by: Harry Wentland Signed-off-by: zhongshiqi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5d1adeda4d90..4b8819c27fcd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -580,6 +580,10 @@ static bool construct(struct dc *dc, #ifdef CONFIG_DRM_AMD_DC_DCN2_0 // Allocate memory for the vm_helper dc->vm_helper = kzalloc(sizeof(struct vm_helper), GFP_KERNEL); + if (!dc->vm_helper) { + dm_error("%s: failed to create dc->vm_helper\n", __func__); + goto fail; + } #endif memcpy(&dc->bb_overrides, &init_params->bb_overrides, sizeof(dc->bb_overrides)); -- cgit v1.2.3 From e6f4e274c1e52d1f0bfe293fb44ddf59de6c0374 Mon Sep 17 00:00:00 2001 From: Pelle van Gils Date: Thu, 24 Oct 2019 16:04:31 +0200 Subject: drm/amdgpu/powerplay/vega10: allow undervolting in p7 The vega10_odn_update_soc_table() function does not allow the SCLK dependent voltage to be set for power-state 7 to a value below the default in pptable. Change the for-loop condition to allow undervolting in the highest state. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=205277 Signed-off-by: Pelle van Gils Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index d08493b67b67..beacfffbdc3e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -5098,9 +5098,7 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk; - for (i = 0; i < podn_vdd_dep->count - 1; i++) - od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc; - if (od_vddc_lookup_table->entries[i].us_vdd < podn_vdd_dep->entries[i].vddc) + for (i = 0; i < podn_vdd_dep->count; i++) od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc; } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) { podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk; -- cgit v1.2.3 From 30ef5c7eaba0ddafc6c23eca65ebe52169dfcc60 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 29 Oct 2019 17:14:15 -0400 Subject: drm/amdgpu/gmc10: properly set BANK_SELECT and FRAGMENT_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These were not aligned for optimal performance for GPUVM. Acked-by: Christian König Reviewed-by: Tianci Yin Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 9 +++++++++ drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 8b789f750b72..db10640a3b2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -151,6 +151,15 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp); tmp = mmGCVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp); tmp = mmGCVM_L2_CNTL4_DEFAULT; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 3542c203c3c8..b39bea6f54e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -137,6 +137,15 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp); tmp = mmMMVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp); tmp = mmMMVM_L2_CNTL4_DEFAULT; -- cgit v1.2.3 From 722608433c945c52b9ccb649c716a6c6c9012ce2 Mon Sep 17 00:00:00 2001 From: Kyle Mahlkuch Date: Fri, 25 Oct 2019 15:40:50 -0500 Subject: drm/radeon: Fix EEH during kexec During kexec some adapters hit an EEH since they are not properly shut down in the radeon_pci_shutdown() function. Adding radeon_suspend_kms() fixes this issue. Enabled only on PPC because this patch causes issues on some other boards. Signed-off-by: Kyle Mahlkuch Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 9e55076578c6..4528f4dc0b2d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -379,11 +379,25 @@ radeon_pci_remove(struct pci_dev *pdev) static void radeon_pci_shutdown(struct pci_dev *pdev) { +#ifdef CONFIG_PPC64 + struct drm_device *ddev = pci_get_drvdata(pdev); +#endif + /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown */ if (radeon_device_is_virtual()) radeon_pci_remove(pdev); + +#ifdef CONFIG_PPC64 + /* Some adapters need to be suspended before a + * shutdown occurs in order to prevent an error + * during kexec. + * Make this power specific becauase it breaks + * some non-power boards. + */ + radeon_suspend_kms(ddev, true, true, false); +#endif } static int radeon_pmops_suspend(struct device *dev) -- cgit v1.2.3 From c868868f6b6a5272350781f9a19b3a5ba1c00b02 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 16 Oct 2019 16:02:07 -0700 Subject: drm/amdgpu: fix stack alignment ABI mismatch for Clang The x86 kernel is compiled with an 8B stack alignment via `-mpreferred-stack-boundary=3` for GCC since 3.6-rc1 via commit d9b0cde91c60 ("x86-64, gcc: Use -mpreferred-stack-boundary=3 if supported") or `-mstack-alignment=8` for Clang. Parts of the AMDGPU driver are compiled with 16B stack alignment. Generally, the stack alignment is part of the ABI. Linking together two different translation units with differing stack alignment is dangerous, particularly when the translation unit with the smaller stack alignment makes calls into the translation unit with the larger stack alignment. While 8B aligned stacks are sometimes also 16B aligned, they are not always. Multiple users have reported General Protection Faults (GPF) when using the AMDGPU driver compiled with Clang. Clang is placing objects in stack slots assuming the stack is 16B aligned, and selecting instructions that require 16B aligned memory operands. At runtime, syscall handlers with 8B aligned stack call into code that assumes 16B stack alignment. When the stack is a multiple of 8B but not 16B, these instructions result in a GPF. Remove the code that added compatibility between the differing compiler flags, as it will result in runtime GPFs when built with Clang. Cleanups for GCC will be sent in later patches in the series. Link: https://github.com/ClangBuiltLinux/linux/issues/735 Debugged-by: Yuxuan Shui Reported-by: Shirish S Reported-by: Yuxuan Shui Suggested-by: Andrew Cooper Signed-off-by: Nick Desaulniers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 10 ++++------ drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 10 ++++------ drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 10 ++++------ drivers/gpu/drm/amd/display/dc/dml/Makefile | 10 ++++------ drivers/gpu/drm/amd/display/dc/dsc/Makefile | 10 ++++------ 5 files changed, 20 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 985633c08a26..4b1a8a08a5de 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -24,13 +24,11 @@ # It calculates Bandwidth and Watermarks values for HW programming # -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +calcs_ccflags := -mhard-float -msse -calcs_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +calcs_ccflags += -mpreferred-stack-boundary=4 +endif ifdef CONFIG_CC_IS_CLANG calcs_ccflags += -msse2 diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index ddb8d5649e79..5fe3eb80075d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -10,13 +10,11 @@ ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT DCN20 += dcn20_dsc.o endif -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4 +endif ifdef CONFIG_CC_IS_CLANG CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2 diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index ef673bffc241..7057e20748b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -3,13 +3,11 @@ DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4 +endif ifdef CONFIG_CC_IS_CLANG CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2 diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 5b2a65b42403..1bd6e307b7f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -24,13 +24,11 @@ # It provides the general basic services required by other DAL # subcomponents. -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +dml_ccflags := -mhard-float -msse -dml_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +dml_ccflags += -mpreferred-stack-boundary=4 +endif ifdef CONFIG_CC_IS_CLANG dml_ccflags += -msse2 diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index b456cd23c6fa..932c3055230e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -1,13 +1,11 @@ # # Makefile for the 'dsc' sub-component of DAL. -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +dsc_ccflags := -mhard-float -msse -dsc_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +dsc_ccflags += -mpreferred-stack-boundary=4 +endif ifdef CONFIG_CC_IS_CLANG dsc_ccflags += -msse2 -- cgit v1.2.3 From 00db297106e81770e7c4319014a67896053b5a22 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 16 Oct 2019 16:02:08 -0700 Subject: drm/amdgpu: fix stack alignment ABI mismatch for GCC 7.1+ GCC earlier than 7.1 errors when compiling code that makes use of `double`s and sets a stack alignment outside of the range of [2^4-2^12]: $ cat foo.c double foo(double x, double y) { return x + y; } $ gcc-4.9 -mpreferred-stack-boundary=3 foo.c error: -mpreferred-stack-boundary=3 is not between 4 and 12 This is likely why the AMDGPU driver was ever compiled with a different stack alignment (and thus different ABI) than the rest of the x86 kernel. The kernel uses 8B stack alignment, while the driver was using 16B stack alignment in a few places. Since GCC 7.1+ doesn't error, fix the ABI mismatch for users of newer versions of GCC. There was discussion about whether to mark the driver broken or not for users of GCC earlier than 7.1, but since the driver currently is working, don't explicitly break the driver for them here. Relying on differing stack alignment is unspecified behavior, and brittle, and may break in the future. This patch is no functional change for GCC users earlier than 7.1. It's been compile tested on GCC 4.9 and 8.3 to check the correct flags. It should be boot tested when built with GCC 7.1+. -mincoming-stack-boundary= or -mstackrealign may help keep this code building for pre-GCC 7.1 users. The version check for GCC is broken into two conditionals, both because cc-ifversion is currently GCC specific, and it simplifies a subsequent patch. Signed-off-by: Nick Desaulniers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 9 +++++++++ drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 9 +++++++++ drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 9 +++++++++ drivers/gpu/drm/amd/display/dc/dml/Makefile | 9 +++++++++ drivers/gpu/drm/amd/display/dc/dsc/Makefile | 9 +++++++++ 5 files changed, 45 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 4b1a8a08a5de..a1af55a86508 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -27,6 +27,15 @@ calcs_ccflags := -mhard-float -msse ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif + +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). calcs_ccflags += -mpreferred-stack-boundary=4 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 5fe3eb80075d..cb0ac131f74a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -13,6 +13,15 @@ endif CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif + +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 7057e20748b9..f92320ddd27f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -6,6 +6,15 @@ DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif + +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4 endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 1bd6e307b7f8..ef1bdd20b425 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -27,6 +27,15 @@ dml_ccflags := -mhard-float -msse ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif + +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). dml_ccflags += -mpreferred-stack-boundary=4 endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index 932c3055230e..3f7840828a9f 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -4,6 +4,15 @@ dsc_ccflags := -mhard-float -msse ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif + +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). dsc_ccflags += -mpreferred-stack-boundary=4 endif -- cgit v1.2.3 From e8a170ff9a3576730e43c0dbdd27b7cd3dc56848 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 16 Oct 2019 16:02:09 -0700 Subject: drm/amdgpu: enable -msse2 for GCC 7.1+ users A final attempt at enabling sse2 for GCC users. Orininally attempted in: commit 10117450735c ("drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines") Reverted due to "reported instability" in: commit 193392ed9f69 ("Revert "drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines"") Re-added just for Clang in: commit 0f0727d971f6 ("drm/amd/display: readd -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines") The original report didn't have enough information to know if the GPF was due to misalignment, but I suspect that it was. (The missing information was the disassembly of the function at the bottom of the trace, to see if the instruction pointer pointed to an instruction with 16B alignment memory operand requirements. The stack trace does show the stack was only 8B but not 16B aligned though, which makes this a strong possibility). Now that the stack misalignment issue has been fixed for users of GCC 7.1+, reattempt adding -msse2. This matches Clang. It will likely never be safe to enable this for pre-GCC 7.1 AND use a 16B aligned stack in these translation units. This is only a functional change for GCC 7.1+ users, and should be boot tested. Link: https://bugs.freedesktop.org/show_bug.cgi?id=109487 Signed-off-by: Nick Desaulniers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 4 +--- drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 4 +--- drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 4 +--- drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 +--- drivers/gpu/drm/amd/display/dc/dsc/Makefile | 4 +--- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index a1af55a86508..26c6d735cdc7 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -37,9 +37,7 @@ ifdef IS_OLD_GCC # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 # (8B stack alignment). calcs_ccflags += -mpreferred-stack-boundary=4 -endif - -ifdef CONFIG_CC_IS_CLANG +else calcs_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index cb0ac131f74a..63f3bddba7da 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -23,9 +23,7 @@ ifdef IS_OLD_GCC # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 # (8B stack alignment). CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4 -endif - -ifdef CONFIG_CC_IS_CLANG +else CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index f92320ddd27f..ff50ae71fe27 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -16,9 +16,7 @@ ifdef IS_OLD_GCC # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 # (8B stack alignment). CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4 -endif - -ifdef CONFIG_CC_IS_CLANG +else CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ef1bdd20b425..8df251626e22 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -37,9 +37,7 @@ ifdef IS_OLD_GCC # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 # (8B stack alignment). dml_ccflags += -mpreferred-stack-boundary=4 -endif - -ifdef CONFIG_CC_IS_CLANG +else dml_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index 3f7840828a9f..970737217e53 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -14,9 +14,7 @@ ifdef IS_OLD_GCC # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 # (8B stack alignment). dsc_ccflags += -mpreferred-stack-boundary=4 -endif - -ifdef CONFIG_CC_IS_CLANG +else dsc_ccflags += -msse2 endif -- cgit v1.2.3 From 875f0706accd6501c3209bb99df8573171fb5d75 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Oct 2019 09:02:19 -0400 Subject: SUNRPC: The TCP back channel mustn't disappear while requests are outstanding If there are TCP back channel requests being processed by the server threads, then we should hold a reference to the transport to ensure it doesn't get freed from underneath us. Reported-by: Neil Brown Fixes: 2ea24497a1b3 ("SUNRPC: RPC callbacks may be split across several..") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/backchannel_rqst.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 339e8c077c2d..7eb251372f94 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -307,8 +307,8 @@ void xprt_free_bc_rqst(struct rpc_rqst *req) */ dprintk("RPC: Last session removed req=%p\n", req); xprt_free_allocation(req); - return; } + xprt_put(xprt); } /* @@ -339,7 +339,7 @@ found: spin_unlock(&xprt->bc_pa_lock); if (new) { if (req != new) - xprt_free_bc_rqst(new); + xprt_free_allocation(new); break; } else if (req) break; @@ -368,6 +368,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); dprintk("RPC: add callback request to list\n"); + xprt_get(xprt); spin_lock(&bc_serv->sv_cb_lock); list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); wake_up(&bc_serv->sv_cb_waitq); -- cgit v1.2.3 From 9edb455e6797bb50aa38ef71e62668966065ede8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Oct 2019 09:02:20 -0400 Subject: SUNRPC: The RDMA back channel mustn't disappear while requests are outstanding If there are RDMA back channel requests being processed by the server threads, then we should hold a reference to the transport to ensure it doesn't get freed from underneath us. Reported-by: Neil Brown Fixes: 63cae47005af ("xprtrdma: Handle incoming backward direction RPC calls") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 50e075fcdd8f..b458bf53ca69 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -163,6 +163,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) spin_lock(&xprt->bc_pa_lock); list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); spin_unlock(&xprt->bc_pa_lock); + xprt_put(xprt); } static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt) @@ -259,6 +260,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, /* Queue rqst for ULP's callback service */ bc_serv = xprt->bc_serv; + xprt_get(xprt); spin_lock(&bc_serv->sv_cb_lock); list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list); spin_unlock(&bc_serv->sv_cb_lock); -- cgit v1.2.3 From 669996add4c92476e0f8d6b4cd2bb308d1939fd7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Oct 2019 09:02:21 -0400 Subject: SUNRPC: Destroy the back channel when we destroy the host transport When we're destroying the host transport mechanism, we should ensure that we do not leak memory by failing to release any back channel slots that might still exist. Reported-by: Neil Brown Reported-by: kbuild test robot Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/bc_xprt.h | 5 +++++ net/sunrpc/backchannel_rqst.c | 2 +- net/sunrpc/xprt.c | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 87d27e13d885..d796058cdff2 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -64,6 +64,11 @@ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, return 0; } +static inline void xprt_destroy_backchannel(struct rpc_xprt *xprt, + unsigned int max_reqs) +{ +} + static inline bool svc_is_backchannel(const struct svc_rqst *rqstp) { return false; diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 7eb251372f94..195b40c5dae4 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -220,7 +220,7 @@ void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs) goto out; spin_lock_bh(&xprt->bc_pa_lock); - xprt->bc_alloc_max -= max_reqs; + xprt->bc_alloc_max -= min(max_reqs, xprt->bc_alloc_max); list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { dprintk("RPC: req=%p\n", req); list_del(&req->rq_bc_pa_list); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 8a45b3ccc313..41df4c507193 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1942,6 +1942,11 @@ static void xprt_destroy_cb(struct work_struct *work) rpc_destroy_wait_queue(&xprt->sending); rpc_destroy_wait_queue(&xprt->backlog); kfree(xprt->servername); + /* + * Destroy any existing back channel + */ + xprt_destroy_backchannel(xprt, UINT_MAX); + /* * Tear down transport state and free the rpc_xprt */ -- cgit v1.2.3 From dc99da4f31ce48d15684bde7916104064520025c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 30 Oct 2019 08:59:22 +0100 Subject: qed: fix spelling mistake "queuess" -> "queues" There is a spelling misake in a DP_NOTICE message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 78f77b712b10..dcb5c917f373 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -2005,7 +2005,7 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn, (qed_iov_validate_active_txq(p_hwfn, vf))) { vf->b_malicious = true; DP_NOTICE(p_hwfn, - "VF [%02x] - considered malicious; Unable to stop RX/TX queuess\n", + "VF [%02x] - considered malicious; Unable to stop RX/TX queues\n", vf->abs_vf_id); status = PFVF_STATUS_MALICIOUS; goto out; -- cgit v1.2.3 From c6761cf521f9bffbdcbb619dba665ebf3bcefb1e Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 30 Oct 2019 08:15:12 +0000 Subject: vxlan: fix unexpected failure of vxlan_changelink() After commit 0ce1822c2a08 ("vxlan: add adjacent link to limit depth level"), vxlan_changelink() could fail because of netdev_adjacent_change_prepare(). netdev_adjacent_change_prepare() returns -EEXIST when old lower device and new lower device are same. (old lower device is "dst->remote_dev" and new lower device is "lowerdev") So, before calling it, lowerdev should be NULL if these devices are same. Test command1: ip link add dummy0 type dummy ip link add vxlan0 type vxlan dev dummy0 dstport 4789 vni 1 ip link set vxlan0 type vxlan ttl 5 RTNETLINK answers: File exists Reported-by: Dan Carpenter Fixes: 0ce1822c2a08 ("vxlan: add adjacent link to limit depth level") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ac5c597aa703..8869154fad88 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3967,6 +3967,9 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], if (err) return err; + if (dst->remote_dev == lowerdev) + lowerdev = NULL; + err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev, extack); if (err) @@ -4008,10 +4011,10 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], mod_timer(&vxlan->age_timer, jiffies); netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev); - if (lowerdev && lowerdev != dst->remote_dev) + if (lowerdev && lowerdev != dst->remote_dev) { dst->remote_dev = lowerdev; - - netdev_update_lockdep_key(lowerdev); + netdev_update_lockdep_key(lowerdev); + } vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true); return 0; } -- cgit v1.2.3 From c63b0968946b2d72178a92793bcc9439e19b385f Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 30 Oct 2019 01:39:58 -0700 Subject: qed: Optimize execution time for nvm attributes configuration. Current implementation for nvm_attr configuration instructs the management FW to load/unload the nvm-cfg image for each user-provided attribute in the input file. This consumes lot of cycles even for few tens of attributes. This patch updates the implementation to perform load/commit of the config for every 50 attributes. After loading the nvm-image, MFW expects that config should be committed in a predefined timer value (5 sec), hence it's not possible to write large number of attributes in a single load/commit window. Hence performing the commits in chunks. Fixes: 0dabbe1bb3a4 ("qed: Add driver API for flashing the config attributes.") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 2ce70097d018..38f7f40b3a4d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -67,10 +67,9 @@ #define QED_ROCE_QPS (8192) #define QED_ROCE_DPIS (8) #define QED_RDMA_SRQS QED_ROCE_QPS -#define QED_NVM_CFG_SET_FLAGS 0xE -#define QED_NVM_CFG_SET_PF_FLAGS 0x1E #define QED_NVM_CFG_GET_FLAGS 0xA #define QED_NVM_CFG_GET_PF_FLAGS 0x1A +#define QED_NVM_CFG_MAX_ATTRS 50 static char version[] = "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n"; @@ -2255,6 +2254,7 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); u8 entity_id, len, buf[32]; + bool need_nvm_init = true; struct qed_ptt *ptt; u16 cfg_id, count; int rc = 0, i; @@ -2271,8 +2271,10 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data) DP_VERBOSE(cdev, NETIF_MSG_DRV, "Read config ids: num_attrs = %0d\n", count); - /* NVM CFG ID attributes */ - for (i = 0; i < count; i++) { + /* NVM CFG ID attributes. Start loop index from 1 to avoid additional + * arithmetic operations in the implementation. + */ + for (i = 1; i <= count; i++) { cfg_id = *((u16 *)*data); *data += 2; entity_id = **data; @@ -2282,8 +2284,21 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data) memcpy(buf, *data, len); *data += len; - flags = entity_id ? QED_NVM_CFG_SET_PF_FLAGS : - QED_NVM_CFG_SET_FLAGS; + flags = 0; + if (need_nvm_init) { + flags |= QED_NVM_CFG_OPTION_INIT; + need_nvm_init = false; + } + + /* Commit to flash and free the resources */ + if (!(i % QED_NVM_CFG_MAX_ATTRS) || i == count) { + flags |= QED_NVM_CFG_OPTION_COMMIT | + QED_NVM_CFG_OPTION_FREE; + need_nvm_init = true; + } + + if (entity_id) + flags |= QED_NVM_CFG_OPTION_ENTITY_SEL; DP_VERBOSE(cdev, NETIF_MSG_DRV, "cfg_id = %d entity = %d len = %d\n", cfg_id, -- cgit v1.2.3 From b7265a0df82c1716bf788096217083ed65a8bb14 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 30 Oct 2019 11:04:22 +0200 Subject: mlxsw: core: Unpublish devlink parameters during reload The devlink parameter "acl_region_rehash_interval" is a runtime parameter whose value is stored in a dynamically allocated memory. While reloading the driver, this memory is freed and then allocated again. A use-after-free might happen if during this time frame someone tries to retrieve its value. Since commit 070c63f20f6c ("net: devlink: allow to change namespaces during reload") the use-after-free can be reliably triggered when reloading the driver into a namespace, as after freeing the memory (via reload_down() callback) all the parameters are notified. Fix this by unpublishing and then re-publishing the parameters during reload. Fixes: 98bbf70c1c41 ("mlxsw: spectrum: add "acl_region_rehash_interval" devlink param") Fixes: 7c62cfb8c574 ("devlink: publish params only after driver init is done") Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 14dcc786926d..4421ab22182f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1186,7 +1186,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_thermal_init; - if (mlxsw_driver->params_register && !reload) + if (mlxsw_driver->params_register) devlink_params_publish(devlink); return 0; @@ -1259,7 +1259,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, return; } - if (mlxsw_core->driver->params_unregister && !reload) + if (mlxsw_core->driver->params_unregister) devlink_params_unpublish(devlink); mlxsw_thermal_fini(mlxsw_core->thermal); mlxsw_hwmon_fini(mlxsw_core->hwmon); -- cgit v1.2.3 From 7541c87c9b7a7e07c84481f37f2c19063b44469b Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 28 Oct 2019 13:29:02 +0100 Subject: bpf: Allow narrow loads of bpf_sysctl fields with offset > 0 "ctx:file_pos sysctl:read read ok narrow" works on s390 by accident: it reads the wrong byte, which happens to have the expected value of 0. Improve the test by seeking to the 4th byte and expecting 4 instead of 0. This makes the latent problem apparent: the test attempts to read the first byte of bpf_sysctl.file_pos, assuming this is the least-significant byte, which is not the case on big-endian machines: a non-zero offset is needed. The point of the test is to verify narrow loads, so we cannot cheat our way out by simply using BPF_W. The existence of the test means that such loads have to be supported, most likely because llvm can generate them. Fix the test by adding a big-endian variant, which uses an offset to access the least-significant byte of bpf_sysctl.file_pos. This reveals the final problem: verifier rejects accesses to bpf_sysctl fields with offset > 0. Such accesses are already allowed for a wide range of structs: __sk_buff, bpf_sock_addr and sk_msg_md to name a few. Extend this support to bpf_sysctl by using bpf_ctx_range instead of offsetof when matching field offsets. Fixes: 7b146cebe30c ("bpf: Sysctl hook") Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx") Fixes: 9a1027e52535 ("selftests/bpf: Test file_pos field in bpf_sysctl ctx") Signed-off-by: Ilya Leoshkevich Signed-off-by: Alexei Starovoitov Acked-by: Andrey Ignatov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191028122902.9763-1-iii@linux.ibm.com --- kernel/bpf/cgroup.c | 4 ++-- tools/testing/selftests/bpf/test_sysctl.c | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ddd8addcdb5c..a3eaf08e7dd3 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1311,12 +1311,12 @@ static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, return false; switch (off) { - case offsetof(struct bpf_sysctl, write): + case bpf_ctx_range(struct bpf_sysctl, write): if (type != BPF_READ) return false; bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); - case offsetof(struct bpf_sysctl, file_pos): + case bpf_ctx_range(struct bpf_sysctl, file_pos): if (type == BPF_READ) { bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index a320e3844b17..7c6e5b173f33 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -161,9 +161,14 @@ static struct sysctl_test tests[] = { .descr = "ctx:file_pos sysctl:read read ok narrow", .insns = { /* If (file_pos == X) */ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, file_pos)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos) + 3), +#endif + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 4, 2), /* return ALLOW; */ BPF_MOV64_IMM(BPF_REG_0, 1), @@ -176,6 +181,7 @@ static struct sysctl_test tests[] = { .attach_type = BPF_CGROUP_SYSCTL, .sysctl = "kernel/ostype", .open_flags = O_RDONLY, + .seek = 4, .result = SUCCESS, }, { -- cgit v1.2.3 From 7170a977743b72cf3eb46ef6ef89885dc7ad3621 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Oct 2019 13:00:04 -0700 Subject: net: annotate accesses to sk->sk_incoming_cpu This socket field can be read and written by concurrent cpus. Use READ_ONCE() and WRITE_ONCE() annotations to document this, and avoid some compiler 'optimizations'. KCSAN reported : BUG: KCSAN: data-race in tcp_v4_rcv / tcp_v4_rcv write to 0xffff88812220763c of 4 bytes by interrupt on cpu 0: sk_incoming_cpu_update include/net/sock.h:953 [inline] tcp_v4_rcv+0x1b3c/0x1bb0 net/ipv4/tcp_ipv4.c:1934 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 napi_poll net/core/dev.c:6392 [inline] net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 __do_softirq+0x115/0x33f kernel/softirq.c:292 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082 do_softirq.part.0+0x6b/0x80 kernel/softirq.c:337 do_softirq kernel/softirq.c:329 [inline] __local_bh_enable_ip+0x76/0x80 kernel/softirq.c:189 read to 0xffff88812220763c of 4 bytes by interrupt on cpu 1: sk_incoming_cpu_update include/net/sock.h:952 [inline] tcp_v4_rcv+0x181a/0x1bb0 net/ipv4/tcp_ipv4.c:1934 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 napi_poll net/core/dev.c:6392 [inline] net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 __do_softirq+0x115/0x33f kernel/softirq.c:292 run_ksoftirqd+0x46/0x60 kernel/softirq.c:603 smpboot_thread_fn+0x37d/0x4a0 kernel/smpboot.c:165 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- net/core/sock.c | 4 ++-- net/ipv4/inet_hashtables.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/inet6_hashtables.c | 2 +- net/ipv6/udp.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index c31a9ed86d5a..8f9adcfac41b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -954,8 +954,8 @@ static inline void sk_incoming_cpu_update(struct sock *sk) { int cpu = raw_smp_processor_id(); - if (unlikely(sk->sk_incoming_cpu != cpu)) - sk->sk_incoming_cpu = cpu; + if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu)) + WRITE_ONCE(sk->sk_incoming_cpu, cpu); } static inline void sock_rps_record_flow_hash(__u32 hash) diff --git a/net/core/sock.c b/net/core/sock.c index b8e758bcb6ad..ac78a570e43a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1127,7 +1127,7 @@ set_rcvbuf: break; } case SO_INCOMING_CPU: - sk->sk_incoming_cpu = val; + WRITE_ONCE(sk->sk_incoming_cpu, val); break; case SO_CNX_ADVICE: @@ -1476,7 +1476,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_INCOMING_CPU: - v.val = sk->sk_incoming_cpu; + v.val = READ_ONCE(sk->sk_incoming_cpu); break; case SO_MEMINFO: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 97824864e40d..83fb00153018 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -240,7 +240,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score = sk->sk_family == PF_INET ? 2 : 1; - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d1ed160af202..1d58ce829dca 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -388,7 +388,7 @@ static int compute_score(struct sock *sk, struct net *net, return -1; score += 4; - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index cf60fae9533b..fbe9d4295eac 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -105,7 +105,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score = 1; - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6324d3a8cb53..9fec580c968e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -135,7 +135,7 @@ static int compute_score(struct sock *sk, struct net *net, return -1; score++; - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; -- cgit v1.2.3 From 3d252454edd0fe88c8250cb7f9dfb9ec12d208d7 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 30 Oct 2019 09:17:18 +0100 Subject: parisc: fix frame pointer in ftrace_regs_caller() The current code in ftrace_regs_caller() doesn't assign %r3 to contain the address of the current frame. This is hidden if the kernel is compiled with FRAME_POINTER, but without it just crashes because it tries to dereference an arbitrary address. Fix this by always setting %r3 to the current stack frame. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 1d1d748c227f..b96d74496977 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -2125,7 +2125,7 @@ ftrace_regs_caller: copy %rp, %r26 LDREG -FTRACE_FRAME_SIZE-PT_SZ_ALGN(%sp), %r25 ldo -8(%r25), %r25 - copy %r3, %arg2 + ldo -FTRACE_FRAME_SIZE(%r1), %arg2 b,l ftrace_function_trampoline, %rp copy %r1, %arg3 /* struct pt_regs */ -- cgit v1.2.3 From 6873e0bd6a9cb14ecfadd89d9ed9698ff1761902 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Oct 2019 13:53:09 -0600 Subject: io_uring: ensure we clear io_kiocb->result before each issue We use io_kiocb->result == -EAGAIN as a way to know if we need to re-submit a polled request, as -EAGAIN reporting happens out-of-line for IO submission failures. This field is cleared when we originally allocate the request, but it isn't reset when we retry the submission from async context. This can cause issues where we think something needs a re-issue, but we're really just reading stale data. Reset ->result whenever we re-prep a request for polled submission. Cc: stable@vger.kernel.org Fixes: 9e645e1105ca ("io_uring: add support for sqe links") Reported-by: Bijan Mottahedeh Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index c11c4157a4c2..f9a38998f2fc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1124,6 +1124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; + req->result = 0; } else { if (kiocb->ki_flags & IOCB_HIPRI) return -EINVAL; -- cgit v1.2.3 From a39331867335d4a94b6165e306265c9e24aca073 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Oct 2019 22:42:57 +0100 Subject: ALSA: timer: Fix mutex deadlock at releasing card When a card is disconnected while in use, the system waits until all opened files are closed then releases the card. This is done via put_device() of the card device in each device release code. The recently reported mutex deadlock bug happens in this code path; snd_timer_close() for the timer device deals with the global register_mutex and it calls put_device() there. When this timer device is the last one, the card gets freed and it eventually calls snd_timer_free(), which has again the protection with the global register_mutex -- boom. Basically put_device() call itself is race-free, so a relative simple workaround is to move this put_device() call out of the mutex. For achieving that, in this patch, snd_timer_close_locked() got a new argument to store the card device pointer in return, and each caller invokes put_device() with the returned object after the mutex unlock. Reported-and-tested-by: Kirill A. Shutemov Cc: Signed-off-by: Takashi Iwai --- sound/core/timer.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 5c9fbf3f4340..6b724d2ee2de 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -226,7 +226,8 @@ static int snd_timer_check_master(struct snd_timer_instance *master) return 0; } -static int snd_timer_close_locked(struct snd_timer_instance *timeri); +static int snd_timer_close_locked(struct snd_timer_instance *timeri, + struct device **card_devp_to_put); /* * open a timer instance @@ -238,6 +239,7 @@ int snd_timer_open(struct snd_timer_instance **ti, { struct snd_timer *timer; struct snd_timer_instance *timeri = NULL; + struct device *card_dev_to_put = NULL; int err; mutex_lock(®ister_mutex); @@ -261,7 +263,7 @@ int snd_timer_open(struct snd_timer_instance **ti, list_add_tail(&timeri->open_list, &snd_timer_slave_list); err = snd_timer_check_slave(timeri); if (err < 0) { - snd_timer_close_locked(timeri); + snd_timer_close_locked(timeri, &card_dev_to_put); timeri = NULL; } goto unlock; @@ -313,7 +315,7 @@ int snd_timer_open(struct snd_timer_instance **ti, timeri = NULL; if (timer->card) - put_device(&timer->card->card_dev); + card_dev_to_put = &timer->card->card_dev; module_put(timer->module); goto unlock; } @@ -323,12 +325,15 @@ int snd_timer_open(struct snd_timer_instance **ti, timer->num_instances++; err = snd_timer_check_master(timeri); if (err < 0) { - snd_timer_close_locked(timeri); + snd_timer_close_locked(timeri, &card_dev_to_put); timeri = NULL; } unlock: mutex_unlock(®ister_mutex); + /* put_device() is called after unlock for avoiding deadlock */ + if (card_dev_to_put) + put_device(card_dev_to_put); *ti = timeri; return err; } @@ -338,7 +343,8 @@ EXPORT_SYMBOL(snd_timer_open); * close a timer instance * call this with register_mutex down. */ -static int snd_timer_close_locked(struct snd_timer_instance *timeri) +static int snd_timer_close_locked(struct snd_timer_instance *timeri, + struct device **card_devp_to_put) { struct snd_timer *timer = timeri->timer; struct snd_timer_instance *slave, *tmp; @@ -395,7 +401,7 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri) timer->hw.close(timer); /* release a card refcount for safe disconnection */ if (timer->card) - put_device(&timer->card->card_dev); + *card_devp_to_put = &timer->card->card_dev; module_put(timer->module); } @@ -407,14 +413,18 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri) */ int snd_timer_close(struct snd_timer_instance *timeri) { + struct device *card_dev_to_put = NULL; int err; if (snd_BUG_ON(!timeri)) return -ENXIO; mutex_lock(®ister_mutex); - err = snd_timer_close_locked(timeri); + err = snd_timer_close_locked(timeri, &card_dev_to_put); mutex_unlock(®ister_mutex); + /* put_device() is called after unlock for avoiding deadlock */ + if (card_dev_to_put) + put_device(card_dev_to_put); return err; } EXPORT_SYMBOL(snd_timer_close); -- cgit v1.2.3 From ee8d153d46a3b98c064ee15c0c0a3bbf1450e5a1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Oct 2019 10:54:44 -0700 Subject: net: annotate lockless accesses to sk->sk_napi_id We already annotated most accesses to sk->sk_napi_id We missed sk_mark_napi_id() and sk_mark_napi_id_once() which might be called without socket lock held in UDP stack. KCSAN reported : BUG: KCSAN: data-race in udpv6_queue_rcv_one_skb / udpv6_queue_rcv_one_skb write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 0: sk_mark_napi_id include/net/busy_poll.h:125 [inline] __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline] udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672 udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689 udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832 __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913 udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015 ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409 ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459 dst_input include/net/dst.h:442 [inline] ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 napi_poll net/core/dev.c:6392 [inline] net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 1: sk_mark_napi_id include/net/busy_poll.h:125 [inline] __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline] udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672 udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689 udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832 __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913 udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015 ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409 ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459 dst_input include/net/dst.h:442 [inline] ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 10890 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: e68b6e50fa35 ("udp: enable busy polling for all sockets") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/busy_poll.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 127a5c4e3699..86e028388bad 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -122,7 +122,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - sk->sk_napi_id = skb->napi_id; + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif sk_rx_queue_set(sk, skb); } @@ -132,8 +132,8 @@ static inline void sk_mark_napi_id_once(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - if (!sk->sk_napi_id) - sk->sk_napi_id = skb->napi_id; + if (!READ_ONCE(sk->sk_napi_id)) + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif } -- cgit v1.2.3 From fc89cc358fb64e2429aeae0f37906126636507ec Mon Sep 17 00:00:00 2001 From: Vishal Kulkarni Date: Wed, 30 Oct 2019 20:17:57 +0530 Subject: cxgb4: fix panic when attaching to ULD fail Release resources when attaching to ULD fail. Otherwise, data mismatch is seen between LLD and ULD later on, which lead to kernel panic when accessing resources that should not even exist in the first place. Fixes: 94cdb8bb993a ("cxgb4: Add support for dynamic allocation of resources for ULD") Signed-off-by: Shahjada Abul Husain Signed-off-by: Vishal Kulkarni Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 28 +++++++++++++++----------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index a4dead4ab0ed..86b528d8364c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -695,10 +695,10 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) lld->write_cmpl_support = adap->params.write_cmpl_support; } -static void uld_attach(struct adapter *adap, unsigned int uld) +static int uld_attach(struct adapter *adap, unsigned int uld) { - void *handle; struct cxgb4_lld_info lli; + void *handle; uld_init(adap, &lli); uld_queue_init(adap, uld, &lli); @@ -708,7 +708,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) dev_warn(adap->pdev_dev, "could not attach to the %s driver, error %ld\n", adap->uld[uld].name, PTR_ERR(handle)); - return; + return PTR_ERR(handle); } adap->uld[uld].handle = handle; @@ -716,22 +716,22 @@ static void uld_attach(struct adapter *adap, unsigned int uld) if (adap->flags & CXGB4_FULL_INIT_DONE) adap->uld[uld].state_change(handle, CXGB4_STATE_UP); + + return 0; } -/** - * cxgb4_register_uld - register an upper-layer driver - * @type: the ULD type - * @p: the ULD methods +/* cxgb4_register_uld - register an upper-layer driver + * @type: the ULD type + * @p: the ULD methods * - * Registers an upper-layer driver with this driver and notifies the ULD - * about any presently available devices that support its type. Returns - * %-EBUSY if a ULD of the same type is already registered. + * Registers an upper-layer driver with this driver and notifies the ULD + * about any presently available devices that support its type. */ void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p) { - int ret = 0; struct adapter *adap; + int ret = 0; if (type >= CXGB4_ULD_MAX) return; @@ -763,8 +763,12 @@ void cxgb4_register_uld(enum cxgb4_uld type, if (ret) goto free_irq; adap->uld[type] = *p; - uld_attach(adap, type); + ret = uld_attach(adap, type); + if (ret) + goto free_txq; continue; +free_txq: + release_sge_txq_uld(adap, type); free_irq: if (adap->flags & CXGB4_FULL_INIT_DONE) quiesce_rx_uld(adap, type); -- cgit v1.2.3 From c4509a5ac0ace94d5b1f0092dc4d36933c1d896e Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 30 Oct 2019 15:32:11 +0000 Subject: hv_netvsc: Fix error handling in netvsc_set_features() When an error is returned by rndis_filter_set_offload_params(), we should still assign the unaffected features to ndev->features. Otherwise, these features will be missing. Fixes: d6792a5a0747 ("hv_netvsc: Add handler for LRO setting change") Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index fd4fff57fd6e..bab7c1f84dfd 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1807,8 +1807,10 @@ static int netvsc_set_features(struct net_device *ndev, ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads); - if (ret) + if (ret) { features ^= NETIF_F_LRO; + ndev->features = features; + } syncvf: if (!vf_netdev) -- cgit v1.2.3 From 719b85c336ed35565d0f3982269d6f684087bb00 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 30 Oct 2019 15:32:13 +0000 Subject: hv_netvsc: Fix error handling in netvsc_attach() If rndis_filter_open() fails, we need to remove the rndis device created in earlier steps, before returning an error code. Otherwise, the retry of netvsc_attach() from its callers will fail and hang. Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic") Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index bab7c1f84dfd..963509add611 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -982,7 +982,7 @@ static int netvsc_attach(struct net_device *ndev, if (netif_running(ndev)) { ret = rndis_filter_open(nvdev); if (ret) - return ret; + goto err; rdev = nvdev->extension; if (!rdev->link_state) @@ -990,6 +990,13 @@ static int netvsc_attach(struct net_device *ndev, } return 0; + +err: + netif_device_detach(ndev); + + rndis_filter_device_remove(hdev, nvdev); + + return ret; } static int netvsc_set_channels(struct net_device *net, -- cgit v1.2.3 From 0b6b30c65621fc11a799ca71241f52d8fd9e334c Mon Sep 17 00:00:00 2001 From: Narendra K Date: Tue, 29 Oct 2019 18:37:50 +0100 Subject: efi: Make CONFIG_EFI_RCI2_TABLE selectable on x86 only For the EFI_RCI2_TABLE Kconfig option, 'make oldconfig' asks the user for input on platforms where the option may not be applicable. This patch modifies the Kconfig option to ask the user for input only when CONFIG_X86 or CONFIG_COMPILE_TEST is set to y. Suggested-by: Geert Uytterhoeven Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: Narendra K Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191029173755.27149-2-ardb@kernel.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 178ee8106828..b248870a9806 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -182,6 +182,7 @@ config RESET_ATTACK_MITIGATION config EFI_RCI2_TABLE bool "EFI Runtime Configuration Interface Table Version 2 Support" + depends on X86 || COMPILE_TEST help Displays the content of the Runtime Configuration Interface Table version 2 on Dell EMC PowerEdge systems as a binary -- cgit v1.2.3 From 2bb6a81633cb47dcba4c9f75605cbe49e6b73d60 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Tue, 29 Oct 2019 18:37:51 +0100 Subject: efi/tpm: Return -EINVAL when determining tpm final events log size fails Currently nothing checks the return value of efi_tpm_eventlog_init(), but in case that changes in the future make sure an error is returned when it fails to determine the tpm final events log size. Suggested-by: Dan Carpenter Signed-off-by: Jerry Snitselaar Signed-off-by: Ard Biesheuvel Reviewed-by: Jarkko Sakkinen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: e658c82be556 ("efi/tpm: Only set 'efi_tpm_final_log_size' after ...") Link: https://lkml.kernel.org/r/20191029173755.27149-3-ardb@kernel.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/tpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index ebd7977653a8..31f9f0e369b9 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -88,6 +88,7 @@ int __init efi_tpm_eventlog_init(void) if (tbl_size < 0) { pr_err(FW_BUG "Failed to parse event in TPM Final Events Log\n"); + ret = -EINVAL; goto out_calc; } -- cgit v1.2.3 From 18b915ac6b0ac5ba7ded03156860f60a9f16df2b Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 29 Oct 2019 18:37:52 +0100 Subject: efi/random: Treat EFI_RNG_PROTOCOL output as bootloader randomness Commit 428826f5358c ("fdt: add support for rng-seed") introduced add_bootloader_randomness(), permitting randomness provided by the bootloader or firmware to be credited as entropy. However, the fact that the UEFI support code was already wired into the RNG subsystem via a call to add_device_randomness() was overlooked, and so it was not converted at the same time. Note that this UEFI (v2.4 or newer) feature is currently only implemented for EFI stub booting on ARM, and further note that CONFIG_RANDOM_TRUST_BOOTLOADER must be enabled, and this should be done only if there indeed is sufficient trust in the bootloader _and_ its source of randomness. [ ardb: update commit log ] Tested-by: Bhupesh Sharma Signed-off-by: Dominik Brodowski Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191029173755.27149-4-ardb@kernel.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 69f00f7453a3..e98bbf8e56d9 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -554,7 +554,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, sizeof(*seed) + size); if (seed != NULL) { pr_notice("seeding entropy pool\n"); - add_device_randomness(seed->bits, seed->size); + add_bootloader_randomness(seed->bits, seed->size); early_memunmap(seed, sizeof(*seed) + size); } else { pr_err("Could not map UEFI random seed!\n"); -- cgit v1.2.3 From 41cd96fa149b29684ebd38759fefb07f9c7d5276 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 29 Oct 2019 18:37:53 +0100 Subject: efi: libstub/arm: Account for firmware reserved memory at the base of RAM The EFI stubloader for ARM starts out by allocating a 32 MB window at the base of RAM, in order to ensure that the decompressor (which blindly copies the uncompressed kernel into that window) does not overwrite other allocations that are made while running in the context of the EFI firmware. In some cases, (e.g., U-Boot running on the Raspberry Pi 2), this is causing boot failures because this initial allocation conflicts with a page of reserved memory at the base of RAM that contains the SMP spin tables and other pieces of firmware data and which was put there by the bootloader under the assumption that the TEXT_OFFSET window right below the kernel is only used partially during early boot, and will be left alone once the memory reservations are processed and taken into account. So let's permit reserved memory regions to exist in the region starting at the base of RAM, and ending at TEXT_OFFSET - 5 * PAGE_SIZE, which is the window below the kernel that is not touched by the early boot code. Tested-by: Guillaume Gardet Signed-off-by: Ard Biesheuvel Acked-by: Chester Lin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191029173755.27149-5-ardb@kernel.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/Makefile | 1 + drivers/firmware/efi/libstub/arm32-stub.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 0460c7581220..ee0661ddb25b 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -52,6 +52,7 @@ lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o string.o random.o \ lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o +CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) # diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index e8f7aefb6813..ffa242ad0a82 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -195,6 +195,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, unsigned long dram_base, efi_loaded_image_t *image) { + unsigned long kernel_base; efi_status_t status; /* @@ -204,9 +205,18 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, * loaded. These assumptions are made by the decompressor, * before any memory map is available. */ - dram_base = round_up(dram_base, SZ_128M); + kernel_base = round_up(dram_base, SZ_128M); - status = reserve_kernel_base(sys_table, dram_base, reserve_addr, + /* + * Note that some platforms (notably, the Raspberry Pi 2) put + * spin-tables and other pieces of firmware at the base of RAM, + * abusing the fact that the window of TEXT_OFFSET bytes at the + * base of the kernel image is only partially used at the moment. + * (Up to 5 pages are used for the swapper page tables) + */ + kernel_base += TEXT_OFFSET - 5 * PAGE_SIZE; + + status = reserve_kernel_base(sys_table, kernel_base, reserve_addr, reserve_size); if (status != EFI_SUCCESS) { pr_efi_err(sys_table, "Unable to allocate memory for uncompressed kernel.\n"); @@ -220,7 +230,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, *image_size = image->image_size; status = efi_relocate_kernel(sys_table, image_addr, *image_size, *image_size, - dram_base + MAX_UNCOMP_KERNEL_SIZE, 0); + kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0); if (status != EFI_SUCCESS) { pr_efi_err(sys_table, "Failed to relocate kernel.\n"); efi_free(sys_table, *reserve_size, *reserve_addr); -- cgit v1.2.3 From 220dd7699c46d5940115bd797b01b2ab047c87b8 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 29 Oct 2019 18:37:54 +0100 Subject: x86, efi: Never relocate kernel below lowest acceptable address Currently, kernel fails to boot on some HyperV VMs when using EFI. And it's a potential issue on all x86 platforms. It's caused by broken kernel relocation on EFI systems, when below three conditions are met: 1. Kernel image is not loaded to the default address (LOAD_PHYSICAL_ADDR) by the loader. 2. There isn't enough room to contain the kernel, starting from the default load address (eg. something else occupied part the region). 3. In the memmap provided by EFI firmware, there is a memory region starts below LOAD_PHYSICAL_ADDR, and suitable for containing the kernel. EFI stub will perform a kernel relocation when condition 1 is met. But due to condition 2, EFI stub can't relocate kernel to the preferred address, so it fallback to ask EFI firmware to alloc lowest usable memory region, got the low region mentioned in condition 3, and relocated kernel there. It's incorrect to relocate the kernel below LOAD_PHYSICAL_ADDR. This is the lowest acceptable kernel relocation address. The first thing goes wrong is in arch/x86/boot/compressed/head_64.S. Kernel decompression will force use LOAD_PHYSICAL_ADDR as the output address if kernel is located below it. Then the relocation before decompression, which move kernel to the end of the decompression buffer, will overwrite other memory region, as there is no enough memory there. To fix it, just don't let EFI stub relocate the kernel to any address lower than lowest acceptable address. [ ardb: introduce efi_low_alloc_above() to reduce the scope of the change ] Signed-off-by: Kairui Song Signed-off-by: Ard Biesheuvel Acked-by: Jarkko Sakkinen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191029173755.27149-6-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 4 +++- drivers/firmware/efi/libstub/arm32-stub.c | 2 +- drivers/firmware/efi/libstub/efi-stub-helper.c | 24 ++++++++++-------------- include/linux/efi.h | 18 ++++++++++++++++-- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index d6662fdef300..82bc60c8acb2 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "../string.h" #include "eboot.h" @@ -813,7 +814,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) status = efi_relocate_kernel(sys_table, &bzimage_addr, hdr->init_size, hdr->init_size, hdr->pref_address, - hdr->kernel_alignment); + hdr->kernel_alignment, + LOAD_PHYSICAL_ADDR); if (status != EFI_SUCCESS) { efi_printk(sys_table, "efi_relocate_kernel() failed!\n"); goto fail; diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index ffa242ad0a82..41213bf5fcf5 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -230,7 +230,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, *image_size = image->image_size; status = efi_relocate_kernel(sys_table, image_addr, *image_size, *image_size, - kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0); + kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0); if (status != EFI_SUCCESS) { pr_efi_err(sys_table, "Failed to relocate kernel.\n"); efi_free(sys_table, *reserve_size, *reserve_addr); diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 3caae7f2cf56..35dbc2791c97 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -260,11 +260,11 @@ fail: } /* - * Allocate at the lowest possible address. + * Allocate at the lowest possible address that is not below 'min'. */ -efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, - unsigned long size, unsigned long align, - unsigned long *addr) +efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg, + unsigned long size, unsigned long align, + unsigned long *addr, unsigned long min) { unsigned long map_size, desc_size, buff_size; efi_memory_desc_t *map; @@ -311,13 +311,8 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, start = desc->phys_addr; end = start + desc->num_pages * EFI_PAGE_SIZE; - /* - * Don't allocate at 0x0. It will confuse code that - * checks pointers against NULL. Skip the first 8 - * bytes so we start at a nice even number. - */ - if (start == 0x0) - start += 8; + if (start < min) + start = min; start = round_up(start, align); if ((start + size) > end) @@ -698,7 +693,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, unsigned long image_size, unsigned long alloc_size, unsigned long preferred_addr, - unsigned long alignment) + unsigned long alignment, + unsigned long min_addr) { unsigned long cur_image_addr; unsigned long new_addr = 0; @@ -731,8 +727,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, * possible. */ if (status != EFI_SUCCESS) { - status = efi_low_alloc(sys_table_arg, alloc_size, alignment, - &new_addr); + status = efi_low_alloc_above(sys_table_arg, alloc_size, + alignment, &new_addr, min_addr); } if (status != EFI_SUCCESS) { pr_efi_err(sys_table_arg, "Failed to allocate usable memory for kernel.\n"); diff --git a/include/linux/efi.h b/include/linux/efi.h index bd3837022307..d87acf62958e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1579,9 +1579,22 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, struct efi_boot_memmap *map); +efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg, + unsigned long size, unsigned long align, + unsigned long *addr, unsigned long min); + +static inline efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, - unsigned long *addr); + unsigned long *addr) +{ + /* + * Don't allocate at 0x0. It will confuse code that + * checks pointers against NULL. Skip the first 8 + * bytes so we start at a nice even number. + */ + return efi_low_alloc_above(sys_table_arg, size, align, addr, 0x8); +} efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, @@ -1592,7 +1605,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, unsigned long image_size, unsigned long alloc_size, unsigned long preferred_addr, - unsigned long alignment); + unsigned long alignment, + unsigned long min_addr); efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, efi_loaded_image_t *image, -- cgit v1.2.3 From 359efcc2c910117d2faf704ce154e91fc976d37f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 29 Oct 2019 18:37:55 +0100 Subject: efi/efi_test: Lock down /dev/efi_test and require CAP_SYS_ADMIN The driver exposes EFI runtime services to user-space through an IOCTL interface, calling the EFI services function pointers directly without using the efivar API. Disallow access to the /dev/efi_test character device when the kernel is locked down to prevent arbitrary user-space to call EFI runtime services. Also require CAP_SYS_ADMIN to open the chardev to prevent unprivileged users to call the EFI runtime services, instead of just relying on the chardev file mode bits for this. The main user of this driver is the fwts [0] tool that already checks if the effective user ID is 0 and fails otherwise. So this change shouldn't cause any regression to this tool. [0]: https://wiki.ubuntu.com/FirmwareTestSuite/Reference/uefivarinfo Signed-off-by: Javier Martinez Canillas Signed-off-by: Ard Biesheuvel Acked-by: Laszlo Ersek Acked-by: Matthew Garrett Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191029173755.27149-7-ardb@kernel.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/test/efi_test.c | 8 ++++++++ include/linux/security.h | 1 + security/lockdown/lockdown.c | 1 + 3 files changed, 10 insertions(+) diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c index 877745c3aaf2..7baf48c01e72 100644 --- a/drivers/firmware/efi/test/efi_test.c +++ b/drivers/firmware/efi/test/efi_test.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -717,6 +718,13 @@ static long efi_test_ioctl(struct file *file, unsigned int cmd, static int efi_test_open(struct inode *inode, struct file *file) { + int ret = security_locked_down(LOCKDOWN_EFI_TEST); + + if (ret) + return ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; /* * nothing special to do here * We do accept multiple open files at the same time as we diff --git a/include/linux/security.h b/include/linux/security.h index a8d59d612d27..9df7547afc0c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -105,6 +105,7 @@ enum lockdown_reason { LOCKDOWN_NONE, LOCKDOWN_MODULE_SIGNATURE, LOCKDOWN_DEV_MEM, + LOCKDOWN_EFI_TEST, LOCKDOWN_KEXEC, LOCKDOWN_HIBERNATION, LOCKDOWN_PCI_ACCESS, diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c index 8a10b43daf74..40b790536def 100644 --- a/security/lockdown/lockdown.c +++ b/security/lockdown/lockdown.c @@ -20,6 +20,7 @@ static const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = { [LOCKDOWN_NONE] = "none", [LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading", [LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port", + [LOCKDOWN_EFI_TEST] = "/dev/efi_test access", [LOCKDOWN_KEXEC] = "kexec of unsigned images", [LOCKDOWN_HIBERNATION] = "hibernation", [LOCKDOWN_PCI_ACCESS] = "direct PCI access", -- cgit v1.2.3 From a97b0e773e492ae319a7e981e98962a1060215f9 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 25 Oct 2019 13:34:58 +0200 Subject: kvm: call kvm_arch_destroy_vm if vm creation fails In kvm_create_vm(), if we've successfully called kvm_arch_init_vm(), but then fail later in the function, we need to call kvm_arch_destroy_vm() so that it can do any necessary cleanup (like freeing memory). Fixes: 44a95dae1d229a ("KVM: x86: Detect and Initialize AVIC support") Signed-off-by: John Sperbeck Signed-off-by: Jim Mattson Reviewed-by: Junaid Shahid [Remove dependency on "kvm: Don't clear reference count on kvm_create_vm() error path" which was not committed. - Paolo] Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ec14dae2f538..d6f0696d98ef 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -641,7 +641,6 @@ static struct kvm *kvm_create_vm(unsigned long type) mutex_init(&kvm->lock); mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); - refcount_set(&kvm->users_count, 1); INIT_LIST_HEAD(&kvm->devices); BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); @@ -650,7 +649,7 @@ static struct kvm *kvm_create_vm(unsigned long type) struct kvm_memslots *slots = kvm_alloc_memslots(); if (!slots) - goto out_err_no_disable; + goto out_err_no_arch_destroy_vm; /* Generations must be different for each address space. */ slots->generation = i; rcu_assign_pointer(kvm->memslots[i], slots); @@ -660,12 +659,13 @@ static struct kvm *kvm_create_vm(unsigned long type) rcu_assign_pointer(kvm->buses[i], kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); if (!kvm->buses[i]) - goto out_err_no_disable; + goto out_err_no_arch_destroy_vm; } + refcount_set(&kvm->users_count, 1); r = kvm_arch_init_vm(kvm, type); if (r) - goto out_err_no_disable; + goto out_err_no_arch_destroy_vm; r = hardware_enable_all(); if (r) @@ -699,7 +699,9 @@ out_err_no_irq_srcu: out_err_no_srcu: hardware_disable_all(); out_err_no_disable: - refcount_set(&kvm->users_count, 0); + kvm_arch_destroy_vm(kvm); + WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); +out_err_no_arch_destroy_vm: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm_get_bus(kvm, i)); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) -- cgit v1.2.3 From 9167ab79936206118cc60e47dcb926c3489f3bd5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 27 Oct 2019 16:23:23 +0100 Subject: KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active VMX already does so if the host has SMEP, in order to support the combination of CR0.WP=1 and CR4.SMEP=1. However, it is perfectly safe to always do so, and in fact VMX already ends up running with EFER.NXE=1 on old processors that lack the "load EFER" controls, because it may help avoiding a slow MSR write. Removing all the conditionals simplifies the code. SVM does not have similar code, but it should since recent AMD processors do support SMEP. So this patch also makes the code for the two vendors more similar while fixing NPT=0, CR0.WP=1 and CR4.SMEP=1 on AMD processors. Cc: stable@vger.kernel.org Cc: Joerg Roedel Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 10 ++++++++-- arch/x86/kvm/vmx/vmx.c | 14 +++----------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ca200b50cde4..c5673bda4b66 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -734,8 +734,14 @@ static int get_npt_level(struct kvm_vcpu *vcpu) static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { vcpu->arch.efer = efer; - if (!npt_enabled && !(efer & EFER_LMA)) - efer &= ~EFER_LME; + + if (!npt_enabled) { + /* Shadow paging assumes NX to be available. */ + efer |= EFER_NX; + + if (!(efer & EFER_LMA)) + efer &= ~EFER_LME; + } to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8f01019295a1..5d21a4ab28cf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -969,17 +969,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) u64 guest_efer = vmx->vcpu.arch.efer; u64 ignore_bits = 0; - if (!enable_ept) { - /* - * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing - * host CPUID is more efficient than testing guest CPUID - * or CR4. Host SMEP is anyway a requirement for guest SMEP. - */ - if (boot_cpu_has(X86_FEATURE_SMEP)) - guest_efer |= EFER_NX; - else if (!(guest_efer & EFER_NX)) - ignore_bits |= EFER_NX; - } + /* Shadow paging assumes NX to be available. */ + if (!enable_ept) + guest_efer |= EFER_NX; /* * LMA and LME handled by hardware; SCE meaningless outside long mode. -- cgit v1.2.3 From 36c602dcdd872e9f9b91aae5266b6d7d72b69b96 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 29 Oct 2019 16:27:38 -0700 Subject: arm64: cpufeature: Enable Qualcomm Falkor errata 1009 for Kryo The Kryo cores share errata 1009 with Falkor, so add their model definitions and enable it for them as well. Signed-off-by: Bjorn Andersson [will: Update entry in silicon-errata.rst] Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 2 +- arch/arm64/kernel/cpu_errata.c | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index ab7ed2fd072f..25d62272de73 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -126,7 +126,7 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | +----------------+-----------------+-----------------+-----------------------------+ -| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | +| Qualcomm Tech. | Kryo/Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 7f9b699969c7..091e3ec0f420 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -659,17 +659,23 @@ static const struct midr_range arm64_harden_el2_vectors[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI - -static const struct midr_range arm64_repeat_tlbi_cpus[] = { +static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0, 0, 0, 0), + { + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0) + }, + { + .midr_range.model = MIDR_QCOM_KRYO, + .matches = is_kryo_midr, + }, #endif #ifdef CONFIG_ARM64_ERRATUM_1286807 - MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + { + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + }, #endif {}, }; - #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 @@ -825,7 +831,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm erratum 1009, ARM erratum 1286807", .capability = ARM64_WORKAROUND_REPEAT_TLBI, - ERRATA_MIDR_RANGE_LIST(arm64_repeat_tlbi_cpus), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = cpucap_multi_entry_cap_matches, + .match_list = arm64_repeat_tlbi_list, }, #endif #ifdef CONFIG_ARM64_ERRATUM_858921 -- cgit v1.2.3 From b8e51a6a9db94bc1fb18ae831b3dab106b5a4b5f Mon Sep 17 00:00:00 2001 From: Yihui ZENG Date: Fri, 25 Oct 2019 12:31:48 +0300 Subject: s390/cmm: fix information leak in cmm_timeout_handler() The problem is that we were putting the NUL terminator too far: buf[sizeof(buf) - 1] = '\0'; If the user input isn't NUL terminated and they haven't initialized the whole buffer then it leads to an info leak. The NUL terminator should be: buf[len - 1] = '\0'; Signed-off-by: Yihui Zeng Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter [heiko.carstens@de.ibm.com: keep semantics of how *lenp and *ppos are handled] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/cmm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 510a18299196..a51c892f14f3 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -298,16 +298,16 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, } if (write) { - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) + len = min(*lenp, sizeof(buf)); + if (copy_from_user(buf, buffer, len)) return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; + buf[len - 1] = '\0'; cmm_skip_blanks(buf, &p); nr = simple_strtoul(p, &p, 0); cmm_skip_blanks(p, &p); seconds = simple_strtoul(p, &p, 0); cmm_set_timeout(nr, seconds); + *ppos += *lenp; } else { len = sprintf(buf, "%ld %ld\n", cmm_timeout_pages, cmm_timeout_seconds); @@ -315,9 +315,9 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, len = *lenp; if (copy_to_user(buffer, buf, len)) return -EFAULT; + *lenp = len; + *ppos += len; } - *lenp = len; - *ppos += len; return 0; } -- cgit v1.2.3 From a1d863ac3e1085e1fea9caafd87252d08731de2e Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 2 Oct 2019 13:29:57 +0200 Subject: s390/unwind: fix mixing regs and sp unwind_for_each_frame stops after the first frame if regs->gprs[15] <= sp. The reason is that in case regs are specified, the first frame should be regs->psw.addr and the second frame should be sp->gprs[8]. However, currently the second frame is regs->gprs[15], which confuses outside_of_stack(). Fix by introducing a flag to distinguish this special case from unwinding the interrupt handler, for which the current behavior is appropriate. Fixes: 78c98f907413 ("s390/unwind: introduce stack unwind API") Signed-off-by: Ilya Leoshkevich Cc: stable@vger.kernel.org # v5.2+ Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/unwind.h | 1 + arch/s390/kernel/unwind_bc.c | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index d827b5b9a32c..eaaefeceef6f 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -35,6 +35,7 @@ struct unwind_state { struct task_struct *task; struct pt_regs *regs; unsigned long sp, ip; + bool reuse_sp; int graph_idx; bool reliable; bool error; diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index 8fc9daae47a2..a8204f952315 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -46,10 +46,15 @@ bool unwind_next_frame(struct unwind_state *state) regs = state->regs; if (unlikely(regs)) { - sp = READ_ONCE_NOCHECK(regs->gprs[15]); - if (unlikely(outside_of_stack(state, sp))) { - if (!update_stack_info(state, sp)) - goto out_err; + if (state->reuse_sp) { + sp = state->sp; + state->reuse_sp = false; + } else { + sp = READ_ONCE_NOCHECK(regs->gprs[15]); + if (unlikely(outside_of_stack(state, sp))) { + if (!update_stack_info(state, sp)) + goto out_err; + } } sf = (struct stack_frame *) sp; ip = READ_ONCE_NOCHECK(sf->gprs[8]); @@ -107,9 +112,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, { struct stack_info *info = &state->stack_info; unsigned long *mask = &state->stack_mask; + bool reliable, reuse_sp; struct stack_frame *sf; unsigned long ip; - bool reliable; memset(state, 0, sizeof(*state)); state->task = task; @@ -134,10 +139,12 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, if (regs) { ip = READ_ONCE_NOCHECK(regs->psw.addr); reliable = true; + reuse_sp = true; } else { sf = (struct stack_frame *) sp; ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; + reuse_sp = false; } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -151,5 +158,6 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, state->sp = sp; state->ip = ip; state->reliable = reliable; + state->reuse_sp = reuse_sp; } EXPORT_SYMBOL_GPL(__unwind_start); -- cgit v1.2.3 From 3d7efa4edd07be5c5c3ffa95ba63e97e070e1f3f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Oct 2019 11:03:27 +0100 Subject: s390/idle: fix cpu idle time calculation The idle time reported in /proc/stat sometimes incorrectly contains huge values on s390. This is caused by a bug in arch_cpu_idle_time(). The kernel tries to figure out when a different cpu entered idle by accessing its per-cpu data structure. There is an ordering problem: if the remote cpu has an idle_enter value which is not zero, and an idle_exit value which is zero, it is assumed it is idle since "now". The "now" timestamp however is taken before the idle_enter value is read. Which in turn means that "now" can be smaller than idle_enter of the remote cpu. Unconditionally subtracting idle_enter from "now" can thus lead to a negative value (aka large unsigned value). Fix this by moving the get_tod_clock() invocation out of the loop. While at it also make the code a bit more readable. A similar bug also exists for show_idle_time(). Fix this is as well. Cc: Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/idle.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index b9d8fe45737a..8f8456816d83 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, struct device_attribute *attr, char *buf) { + unsigned long long now, idle_time, idle_enter, idle_exit, in_idle; struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_time = READ_ONCE(idle->idle_time); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + idle_time += in_idle; return sprintf(buf, "%llu\n", idle_time >> 12); } DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); @@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); u64 arch_cpu_idle_time(int cpu) { struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit; + unsigned long long now, idle_enter, idle_exit, in_idle; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - - return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0); + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + return cputime_to_nsecs(in_idle); } void arch_cpu_idle_enter(void) -- cgit v1.2.3 From 41591a51f00d2dc7bb9dc6e9bedf56c5cf6f2392 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 31 Oct 2019 13:53:41 +0300 Subject: iocost: don't nest spin_lock_irq in ioc_weight_write() This code causes a static analysis warning: block/blk-iocost.c:2113 ioc_weight_write() error: double lock 'irq' We disable IRQs in blkg_conf_prep() and re-enable them in blkg_conf_finish(). IRQ disable/enable should not be nested because that means the IRQs will be enabled at the first unlock instead of the second one. Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") Acked-by: Tejun Heo Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- block/blk-iocost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 2a3db80c1dce..a7ed434eae03 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2110,10 +2110,10 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, goto einval; } - spin_lock_irq(&iocg->ioc->lock); + spin_lock(&iocg->ioc->lock); iocg->cfg_weight = v; weight_updated(iocg); - spin_unlock_irq(&iocg->ioc->lock); + spin_unlock(&iocg->ioc->lock); blkg_conf_finish(&ctx); return nbytes; -- cgit v1.2.3 From d8eca64eec7103ab1fbabc0a187dbf6acfb2af93 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 31 Oct 2019 11:07:13 +0200 Subject: usb: dwc3: gadget: fix race when disabling ep with cancelled xfers When disabling an endpoint which has cancelled requests, we should make sure to giveback requests that are currently pending in the cancelled list, otherwise we may fall into a situation where command completion interrupt fires after endpoint has been disabled, therefore causing a splat. Fixes: fec9095bdef4 "usb: dwc3: gadget: remove wait_end_transfer" Reported-by: Roger Quadros Signed-off-by: Felipe Balbi Link: https://lore.kernel.org/r/20191031090713.1452818-1-felipe.balbi@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 86dc1db788a9..a9aba716bf80 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -707,6 +707,12 @@ static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep) dwc3_gadget_giveback(dep, req, -ESHUTDOWN); } + + while (!list_empty(&dep->cancelled_list)) { + req = next_request(&dep->cancelled_list); + + dwc3_gadget_giveback(dep, req, -ESHUTDOWN); + } } /** -- cgit v1.2.3 From f9c32435ab7221d1d6cb35738fa85a2da012b23e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 31 Oct 2019 12:13:46 +0000 Subject: rxrpc: Fix handling of last subpacket of jumbo packet When rxrpc_recvmsg_data() sets the return value to 1 because it's drained all the data for the last packet, it checks the last-packet flag on the whole packet - but this is wrong, since the last-packet flag is only set on the final subpacket of the last jumbo packet. This means that a call that receives its last packet in a jumbo packet won't complete properly. Fix this by having rxrpc_locate_data() determine the last-packet state of the subpacket it's looking at and passing that back to the caller rather than having the caller look in the packet header. The caller then needs to cache this in the rxrpc_call struct as rxrpc_locate_data() isn't then called again for this packet. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Fixes: e2de6c404898 ("rxrpc: Use info in skbuff instead of reparsing a jumbo packet") Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/recvmsg.c | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ecc17dabec8f..7c7d10f2e0c1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -601,6 +601,7 @@ struct rxrpc_call { int debug_id; /* debug ID for printks */ unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ unsigned short rx_pkt_len; /* Current recvmsg packet len */ + bool rx_pkt_last; /* Current recvmsg packet is last */ /* Rx/Tx circular buffer, depending on phase. * diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a4090797c9b2..8578c39ec839 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -267,11 +267,13 @@ static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, */ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, u8 *_annotation, - unsigned int *_offset, unsigned int *_len) + unsigned int *_offset, unsigned int *_len, + bool *_last) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len; + bool last = false; int ret; u8 annotation = *_annotation; u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET; @@ -281,6 +283,8 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, len = skb->len - offset; if (subpacket < sp->nr_subpackets - 1) len = RXRPC_JUMBO_DATALEN; + else if (sp->rx_flags & RXRPC_SKB_INCL_LAST) + last = true; if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { ret = rxrpc_verify_packet(call, skb, annotation, offset, len); @@ -291,6 +295,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, *_offset = offset; *_len = len; + *_last = last; call->security->locate_data(call, skb, _offset, _len); return 0; } @@ -309,7 +314,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; size_t remain; - bool last; + bool rx_pkt_last; unsigned int rx_pkt_offset, rx_pkt_len; int ix, copy, ret = -EAGAIN, ret2; @@ -319,6 +324,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; + rx_pkt_last = call->rx_pkt_last; if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { seq = call->rx_hard_ack; @@ -329,6 +335,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, /* Barriers against rxrpc_input_data(). */ hard_ack = call->rx_hard_ack; seq = hard_ack + 1; + while (top = smp_load_acquire(&call->rx_top), before_eq(seq, top) ) { @@ -356,7 +363,8 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (rx_pkt_offset == 0) { ret2 = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], - &rx_pkt_offset, &rx_pkt_len); + &rx_pkt_offset, &rx_pkt_len, + &rx_pkt_last); trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq, rx_pkt_offset, rx_pkt_len, ret2); if (ret2 < 0) { @@ -396,13 +404,12 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, } /* The whole packet has been transferred. */ - last = sp->hdr.flags & RXRPC_LAST_PACKET; if (!(flags & MSG_PEEK)) rxrpc_rotate_rx_window(call); rx_pkt_offset = 0; rx_pkt_len = 0; - if (last) { + if (rx_pkt_last) { ASSERTCMP(seq, ==, READ_ONCE(call->rx_top)); ret = 1; goto out; @@ -415,6 +422,7 @@ out: if (!(flags & MSG_PEEK)) { call->rx_pkt_offset = rx_pkt_offset; call->rx_pkt_len = rx_pkt_len; + call->rx_pkt_last = rx_pkt_last; } done: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, -- cgit v1.2.3 From 6d6f0383b697f004c65823c2b64240912f18515d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 31 Oct 2019 18:20:30 +0200 Subject: netdevsim: Fix use-after-free during device dismantle Commit da58f90f11f5 ("netdevsim: Add devlink-trap support") added delayed work to netdevsim that periodically iterates over the registered netdevsim ports and reports various packet traps via devlink. While the delayed work takes the 'port_list_lock' mutex to protect against concurrent addition / deletion of ports, during device creation / dismantle ports are added / deleted without this lock, which can result in a use-after-free [1]. Fix this by making sure that the ports list is always modified under the lock. [1] [ 59.205543] ================================================================== [ 59.207748] BUG: KASAN: use-after-free in nsim_dev_trap_report_work+0xa67/0xad0 [ 59.210247] Read of size 8 at addr ffff8883cbdd3398 by task kworker/3:1/38 [ 59.212584] [ 59.213148] CPU: 3 PID: 38 Comm: kworker/3:1 Not tainted 5.4.0-rc3-custom-16119-ge6abb5f0261e #2013 [ 59.215896] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180724_192412-buildhw-07.phx2.fedoraproject.org-1.fc29 04/01/2014 [ 59.218384] Workqueue: events nsim_dev_trap_report_work [ 59.219428] Call Trace: [ 59.219924] dump_stack+0xa9/0x10e [ 59.220623] print_address_description.constprop.4+0x21/0x340 [ 59.221976] ? vprintk_func+0x66/0x240 [ 59.222752] __kasan_report.cold.8+0x78/0x91 [ 59.223602] ? nsim_dev_trap_report_work+0xa67/0xad0 [ 59.224603] kasan_report+0xe/0x20 [ 59.225296] nsim_dev_trap_report_work+0xa67/0xad0 [ 59.226435] ? rcu_read_lock_sched_held+0xaf/0xe0 [ 59.227512] ? trace_event_raw_event_rcu_quiescent_state_report+0x360/0x360 [ 59.228851] process_one_work+0x98f/0x1760 [ 59.229684] ? pwq_dec_nr_in_flight+0x330/0x330 [ 59.230656] worker_thread+0x91/0xc40 [ 59.231587] ? process_one_work+0x1760/0x1760 [ 59.232451] kthread+0x34a/0x410 [ 59.233104] ? __kthread_queue_delayed_work+0x240/0x240 [ 59.234141] ret_from_fork+0x3a/0x50 [ 59.234982] [ 59.235371] Allocated by task 187: [ 59.236189] save_stack+0x19/0x80 [ 59.236853] __kasan_kmalloc.constprop.5+0xc1/0xd0 [ 59.237822] kmem_cache_alloc_trace+0x14c/0x380 [ 59.238769] __nsim_dev_port_add+0xaf/0x5c0 [ 59.239627] nsim_dev_probe+0x4fc/0x1140 [ 59.240550] really_probe+0x264/0xc00 [ 59.241418] driver_probe_device+0x208/0x2e0 [ 59.242255] __device_attach_driver+0x215/0x2d0 [ 59.243150] bus_for_each_drv+0x154/0x1d0 [ 59.243944] __device_attach+0x1ba/0x2b0 [ 59.244923] bus_probe_device+0x1dd/0x290 [ 59.245805] device_add+0xbac/0x1550 [ 59.246528] new_device_store+0x1f4/0x400 [ 59.247306] bus_attr_store+0x7b/0xa0 [ 59.248047] sysfs_kf_write+0x10f/0x170 [ 59.248941] kernfs_fop_write+0x283/0x430 [ 59.249843] __vfs_write+0x81/0x100 [ 59.250546] vfs_write+0x1ce/0x510 [ 59.251190] ksys_write+0x104/0x200 [ 59.251873] do_syscall_64+0xa4/0x4e0 [ 59.252642] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 59.253837] [ 59.254203] Freed by task 187: [ 59.254811] save_stack+0x19/0x80 [ 59.255463] __kasan_slab_free+0x125/0x170 [ 59.256265] kfree+0x100/0x440 [ 59.256870] nsim_dev_remove+0x98/0x100 [ 59.257651] nsim_bus_remove+0x16/0x20 [ 59.258382] device_release_driver_internal+0x20b/0x4d0 [ 59.259588] bus_remove_device+0x2e9/0x5a0 [ 59.260551] device_del+0x410/0xad0 [ 59.263777] device_unregister+0x26/0xc0 [ 59.264616] nsim_bus_dev_del+0x16/0x60 [ 59.265381] del_device_store+0x2d6/0x3c0 [ 59.266295] bus_attr_store+0x7b/0xa0 [ 59.267192] sysfs_kf_write+0x10f/0x170 [ 59.267960] kernfs_fop_write+0x283/0x430 [ 59.268800] __vfs_write+0x81/0x100 [ 59.269551] vfs_write+0x1ce/0x510 [ 59.270252] ksys_write+0x104/0x200 [ 59.270910] do_syscall_64+0xa4/0x4e0 [ 59.271680] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 59.272812] [ 59.273211] The buggy address belongs to the object at ffff8883cbdd3200 [ 59.273211] which belongs to the cache kmalloc-512 of size 512 [ 59.275838] The buggy address is located 408 bytes inside of [ 59.275838] 512-byte region [ffff8883cbdd3200, ffff8883cbdd3400) [ 59.278151] The buggy address belongs to the page: [ 59.279215] page:ffffea000f2f7400 refcount:1 mapcount:0 mapping:ffff8883ecc0ce00 index:0x0 compound_mapcount: 0 [ 59.281449] flags: 0x200000000010200(slab|head) [ 59.282356] raw: 0200000000010200 ffffea000f2f3a08 ffffea000f2fd608 ffff8883ecc0ce00 [ 59.283949] raw: 0000000000000000 0000000000150015 00000001ffffffff 0000000000000000 [ 59.285608] page dumped because: kasan: bad access detected [ 59.286981] [ 59.287337] Memory state around the buggy address: [ 59.288310] ffff8883cbdd3280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 59.289763] ffff8883cbdd3300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 59.291452] >ffff8883cbdd3380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 59.292945] ^ [ 59.293815] ffff8883cbdd3400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 59.295220] ffff8883cbdd3480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 59.296872] ================================================================== Fixes: da58f90f11f5 ("netdevsim: Add devlink-trap support") Signed-off-by: Ido Schimmel Reported-by: syzbot+9ed8f68ab30761f3678e@syzkaller.appspotmail.com Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 56576d4f34a5..54ca6681ba31 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -806,9 +806,11 @@ static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) { struct nsim_dev_port *nsim_dev_port, *tmp; + mutex_lock(&nsim_dev->port_list_lock); list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) __nsim_dev_port_del(nsim_dev_port); + mutex_unlock(&nsim_dev->port_list_lock); } int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) @@ -822,14 +824,17 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) return PTR_ERR(nsim_dev); dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev); + mutex_lock(&nsim_dev->port_list_lock); for (i = 0; i < nsim_bus_dev->port_count; i++) { err = __nsim_dev_port_add(nsim_dev, i); if (err) goto err_port_del_all; } + mutex_unlock(&nsim_dev->port_list_lock); return 0; err_port_del_all: + mutex_unlock(&nsim_dev->port_list_lock); nsim_dev_port_del_all(nsim_dev); nsim_dev_destroy(nsim_dev); return err; -- cgit v1.2.3 From 6bd7cf66578fae18c26d92115058482cc74ca71b Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 1 Oct 2019 13:33:06 +0200 Subject: perf tools: Make usage of test_attr__* optional for perf-sys.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For users of perf-sys.h outside perf, e.g. samples/bpf/bpf_load.c, it's convenient not to depend on test_attr__*. After commit 91854f9a077e ("perf tools: Move everything related to sys_perf_event_open() to perf-sys.h"), all users of perf-sys.h will depend on test_attr__enabled and test_attr__open. This commit enables a user to define HAVE_ATTR_TEST to zero in order to omit the test dependency. Fixes: 91854f9a077e ("perf tools: Move everything related to sys_perf_event_open() to perf-sys.h") Signed-off-by: Björn Töpel Acked-by: Song Liu Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Namhyung Kim Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Daniel Borkmann Link: http://lore.kernel.org/bpf/20191001113307.27796-2-bjorn.topel@gmail.com --- tools/perf/perf-sys.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index 63e4349a772a..15e458e150bd 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -15,7 +15,9 @@ void test_attr__init(void); void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, int fd, int group_fd, unsigned long flags); -#define HAVE_ATTR_TEST +#ifndef HAVE_ATTR_TEST +#define HAVE_ATTR_TEST 1 +#endif static inline int sys_perf_event_open(struct perf_event_attr *attr, @@ -27,7 +29,7 @@ sys_perf_event_open(struct perf_event_attr *attr, fd = syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); -#ifdef HAVE_ATTR_TEST +#if HAVE_ATTR_TEST if (unlikely(test_attr__enabled)) test_attr__open(attr, pid, cpu, fd, group_fd, flags); #endif -- cgit v1.2.3 From 04ec044b7d30800296824783df7d9728d16d7567 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 1 Oct 2019 13:33:07 +0200 Subject: samples/bpf: fix build by setting HAVE_ATTR_TEST to zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To remove that test_attr__{enabled/open} are used by perf-sys.h, we set HAVE_ATTR_TEST to zero. Signed-off-by: Björn Töpel Tested-by: KP Singh Acked-by: Song Liu Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Namhyung Kim Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Daniel Borkmann Link: http://lore.kernel.org/bpf/20191001113307.27796-3-bjorn.topel@gmail.com --- samples/bpf/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1d9be26b4edd..42b571cde177 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -176,6 +176,7 @@ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf +KBUILD_HOSTCFLAGS += -DHAVE_ATTR_TEST=0 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable -- cgit v1.2.3 From ff1c08e1f74b6864854c39be48aa799a6a2e4d2b Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 29 Oct 2019 16:43:07 +0100 Subject: bpf: Change size to u64 for bpf_map_{area_alloc, charge_init}() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The functions bpf_map_area_alloc() and bpf_map_charge_init() prior this commit passed the size parameter as size_t. In this commit this is changed to u64. All users of these functions avoid size_t overflows on 32-bit systems, by explicitly using u64 when calculating the allocation size and memory charge cost. However, since the result was narrowed by the size_t when passing size and cost to the functions, the overflow handling was in vain. Instead of changing all call sites to size_t and handle overflow at the call site, the parameter is changed to u64 and checked in the functions above. Fixes: d407bd25a204 ("bpf: don't trigger OOM killer under pressure with map alloc") Fixes: c85d69135a91 ("bpf: move memory size checks to bpf_map_charge_init()") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/20191029154307.23053-1-bjorn.topel@gmail.com --- include/linux/bpf.h | 4 ++-- kernel/bpf/syscall.c | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b9d22338606..3bf3835d0e86 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -656,11 +656,11 @@ void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); -int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size); +int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size); void bpf_map_charge_finish(struct bpf_map_memory *mem); void bpf_map_charge_move(struct bpf_map_memory *dst, struct bpf_map_memory *src); -void *bpf_map_area_alloc(size_t size, int numa_node); +void *bpf_map_area_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0937719b87e2..ace1cfaa24b6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -126,7 +126,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) return map; } -void *bpf_map_area_alloc(size_t size, int numa_node) +void *bpf_map_area_alloc(u64 size, int numa_node) { /* We really just want to fail instead of triggering OOM killer * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, @@ -141,6 +141,9 @@ void *bpf_map_area_alloc(size_t size, int numa_node) const gfp_t flags = __GFP_NOWARN | __GFP_ZERO; void *area; + if (size >= SIZE_MAX) + return NULL; + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags, numa_node); @@ -197,7 +200,7 @@ static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) atomic_long_sub(pages, &user->locked_vm); } -int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size) +int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size) { u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; struct user_struct *user; -- cgit v1.2.3 From 19f92a030ca6d772ab44b22ee6a01378a8cb32d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Oct 2019 09:36:20 -0700 Subject: net: increase SOMAXCONN to 4096 SOMAXCONN is /proc/sys/net/core/somaxconn default value. It has been defined as 128 more than 20 years ago. Since it caps the listen() backlog values, the very small value has caused numerous problems over the years, and many people had to raise it on their hosts after beeing hit by problems. Google has been using 1024 for at least 15 years, and we increased this to 4096 after TCP listener rework has been completed, more than 4 years ago. We got no complain of this change breaking any legacy application. Many applications indeed setup a TCP listener with listen(fd, -1); meaning they let the system select the backlog. Raising SOMAXCONN lowers chance of the port being unavailable under even small SYNFLOOD attack, and reduces possibilities of side channel vulnerabilities. Signed-off-by: Eric Dumazet Cc: Willy Tarreau Cc: Yue Cao Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 4 ++-- include/linux/socket.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 49e95f438ed7..0e6653471c0e 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -207,8 +207,8 @@ TCP variables: somaxconn - INTEGER Limit of socket listen() backlog, known in userspace as SOMAXCONN. - Defaults to 128. See also tcp_max_syn_backlog for additional tuning - for TCP sockets. + Defaults to 4096. (Was 128 before linux-5.4) + See also tcp_max_syn_backlog for additional tuning for TCP sockets. tcp_abort_on_overflow - BOOLEAN If listening service is too slow to accept new connections, diff --git a/include/linux/socket.h b/include/linux/socket.h index fc0bed59fc84..4049d9755cf1 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -263,7 +263,7 @@ struct ucred { #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ -#define SOMAXCONN 128 +#define SOMAXCONN 4096 /* Flags we can use with send/ and recv. Added those for 1003.1g not all are supported yet -- cgit v1.2.3 From 623d0c2db02043e43b698fdd8de1bd398b8e7b37 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Oct 2019 10:05:46 -0700 Subject: tcp: increase tcp_max_syn_backlog max value tcp_max_syn_backlog default value depends on memory size and TCP ehash size. Before this patch, the max value was 2048 [1], which is considered too small nowadays. Increase it to 4096 to match the recent SOMAXCONN change. [1] This is with TCP ehash size being capped to 524288 buckets. Signed-off-by: Eric Dumazet Cc: Willy Tarreau Cc: Yue Cao Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 7 +++++-- net/ipv4/tcp_ipv4.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 0e6653471c0e..8d4ad1d1ae26 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -408,11 +408,14 @@ tcp_max_orphans - INTEGER up to ~64K of unswappable memory. tcp_max_syn_backlog - INTEGER - Maximal number of remembered connection requests, which have not - received an acknowledgment from connecting client. + Maximal number of remembered connection requests (SYN_RECV), + which have not received an acknowledgment from connecting client. + This is a per-listener limit. The minimal value is 128 for low memory machines, and it will increase in proportion to the memory of machine. If server suffers from overload, try increasing this number. + Remember to also check /proc/sys/net/core/somaxconn + A SYN_RECV request socket consumes about 304 bytes of memory. tcp_max_tw_buckets - INTEGER Maximal number of timewait sockets held by system simultaneously. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6be568334848..b74192695955 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2681,7 +2681,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2; net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; - net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256); + net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128); net->ipv4.sysctl_tcp_sack = 1; net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_timestamps = 1; -- cgit v1.2.3 From 94bc1e522b32c866d85b5af0ede55026b585ae73 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 22 Aug 2019 14:33:18 -0400 Subject: igb/igc: Don't warn on fatal read failures when the device is removed Fatal read errors are worth warning about, unless of course the device was just unplugged from the machine - something that's a rather normal occurrence when the igb/igc adapter is located on a Thunderbolt dock. So, let's only WARN() if there's a fatal read error while the device is still present. This fixes the following WARN splat that's been appearing whenever I unplug my Caldigit TS3 Thunderbolt dock from my laptop: igb 0000:09:00.0 enp9s0: PCIe link lost ------------[ cut here ]------------ igb: Failed to read reg 0x18! WARNING: CPU: 7 PID: 516 at drivers/net/ethernet/intel/igb/igb_main.c:756 igb_rd32+0x57/0x6a [igb] Modules linked in: igb dca thunderbolt fuse vfat fat elan_i2c mei_wdt mei_hdcp i915 wmi_bmof intel_wmi_thunderbolt iTCO_wdt iTCO_vendor_support x86_pkg_temp_thermal intel_powerclamp joydev coretemp crct10dif_pclmul crc32_pclmul i2c_algo_bit ghash_clmulni_intel intel_cstate drm_kms_helper intel_uncore syscopyarea sysfillrect sysimgblt fb_sys_fops intel_rapl_perf intel_xhci_usb_role_switch mei_me drm roles idma64 i2c_i801 ucsi_acpi typec_ucsi mei intel_lpss_pci processor_thermal_device typec intel_pch_thermal intel_soc_dts_iosf intel_lpss int3403_thermal thinkpad_acpi wmi int340x_thermal_zone ledtrig_audio int3400_thermal acpi_thermal_rel acpi_pad video pcc_cpufreq ip_tables serio_raw nvme nvme_core crc32c_intel uas usb_storage e1000e i2c_dev CPU: 7 PID: 516 Comm: kworker/u16:3 Not tainted 5.2.0-rc1Lyude-Test+ #14 Hardware name: LENOVO 20L8S2N800/20L8S2N800, BIOS N22ET35W (1.12 ) 04/09/2018 Workqueue: kacpi_hotplug acpi_hotplug_work_fn RIP: 0010:igb_rd32+0x57/0x6a [igb] Code: 87 b8 fc ff ff 48 c7 47 08 00 00 00 00 48 c7 c6 33 42 9b c0 4c 89 c7 e8 47 45 cd dc 89 ee 48 c7 c7 43 42 9b c0 e8 c1 94 71 dc <0f> 0b eb 08 8b 00 ff c0 75 b0 eb c8 44 89 e0 5d 41 5c c3 0f 1f 44 RSP: 0018:ffffba5801cf7c48 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff9e7956608840 RCX: 0000000000000007 RDX: 0000000000000000 RSI: ffffba5801cf7b24 RDI: ffff9e795e3d6a00 RBP: 0000000000000018 R08: 000000009dec4a01 R09: ffffffff9e61018f R10: 0000000000000000 R11: ffffba5801cf7ae5 R12: 00000000ffffffff R13: ffff9e7956608840 R14: ffff9e795a6f10b0 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff9e795e3c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000564317bc4088 CR3: 000000010e00a006 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: igb_release_hw_control+0x1a/0x30 [igb] igb_remove+0xc5/0x14b [igb] pci_device_remove+0x3b/0x93 device_release_driver_internal+0xd7/0x17e pci_stop_bus_device+0x36/0x75 pci_stop_bus_device+0x66/0x75 pci_stop_bus_device+0x66/0x75 pci_stop_and_remove_bus_device+0xf/0x19 trim_stale_devices+0xc5/0x13a ? __pm_runtime_resume+0x6e/0x7b trim_stale_devices+0x103/0x13a ? __pm_runtime_resume+0x6e/0x7b trim_stale_devices+0x103/0x13a acpiphp_check_bridge+0xd8/0xf5 acpiphp_hotplug_notify+0xf7/0x14b ? acpiphp_check_bridge+0xf5/0xf5 acpi_device_hotplug+0x357/0x3b5 acpi_hotplug_work_fn+0x1a/0x23 process_one_work+0x1a7/0x296 worker_thread+0x1a8/0x24c ? process_scheduled_works+0x2c/0x2c kthread+0xe9/0xee ? kthread_destroy_worker+0x41/0x41 ret_from_fork+0x35/0x40 ---[ end trace 252bf10352c63d22 ]--- Signed-off-by: Lyude Paul Fixes: 47e16692b26b ("igb/igc: warn when fatal read failure happens") Acked-by: Sasha Neftin Tested-by: Aaron Brown Acked-by: Feng Tang Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- drivers/net/ethernet/intel/igc/igc_main.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 105b0624081a..31b9e02875cc 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -753,7 +753,8 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg) struct net_device *netdev = igb->netdev; hw->hw_addr = NULL; netdev_err(netdev, "PCIe link lost\n"); - WARN(1, "igb: Failed to read reg 0x%x!\n", reg); + WARN(pci_device_is_present(igb->pdev), + "igb: Failed to read reg 0x%x!\n", reg); } return value; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 63b62d74f961..8e424dfab12e 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -4047,7 +4047,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) hw->hw_addr = NULL; netif_device_detach(netdev); netdev_err(netdev, "PCIe link lost, device now detached\n"); - WARN(1, "igc: Failed to read reg 0x%x!\n", reg); + WARN(pci_device_is_present(igc->pdev), + "igc: Failed to read reg 0x%x!\n", reg); } return value; -- cgit v1.2.3 From fb2308ba16bf1fd2cc3635172381e265fbfcb76d Mon Sep 17 00:00:00 2001 From: Manfred Rudigier Date: Thu, 15 Aug 2019 13:55:19 -0700 Subject: igb: Enable media autosense for the i350. This patch enables the hardware feature "Media Auto Sense" also on the i350. It works in the same way as on the 82850 devices. Hardware designs using dual PHYs (fiber/copper) can enable this feature by setting the MAS enable bits in the NVM_COMPAT register (0x03) in the EEPROM. Signed-off-by: Manfred Rudigier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/e1000_82575.c | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 3ec2ce0725d5..8a6ef3514129 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -466,7 +466,7 @@ static s32 igb_init_mac_params_82575(struct e1000_hw *hw) ? igb_setup_copper_link_82575 : igb_setup_serdes_link_82575; - if (mac->type == e1000_82580) { + if (mac->type == e1000_82580 || mac->type == e1000_i350) { switch (hw->device_id) { /* feature not supported on these id's */ case E1000_DEV_ID_DH89XXCC_SGMII: diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 31b9e02875cc..17a961c3d6e4 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2371,7 +2371,7 @@ void igb_reset(struct igb_adapter *adapter) adapter->ei.get_invariants(hw); adapter->flags &= ~IGB_FLAG_MEDIA_RESET; } - if ((mac->type == e1000_82575) && + if ((mac->type == e1000_82575 || mac->type == e1000_i350) && (adapter->flags & IGB_FLAG_MAS_ENABLE)) { igb_enable_mas(adapter); } -- cgit v1.2.3 From bfc97f9f199cb041cf897af3af096540948cc705 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 31 Oct 2019 14:47:23 -0700 Subject: arm64: apply ARM64_ERRATUM_845719 workaround for Brahma-B53 core The Broadcom Brahma-B53 core is susceptible to the issue described by ARM64_ERRATUM_845719 so this commit enables the workaround to be applied when executing on that core. Since there are now multiple entries to match, we must convert the existing ARM64_ERRATUM_845719 into an erratum list. Signed-off-by: Doug Berger Signed-off-by: Florian Fainelli Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 3 +++ arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/cpu_errata.c | 13 +++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 25d62272de73..189a1768e26a 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -91,6 +91,9 @@ stable kernels. | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ +| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 | ++----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX ITS | #22375,24313 | CAVIUM_ERRATUM_22375 | +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index b1454d117cd2..aca07c2f6e6e 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -79,6 +79,7 @@ #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 #define CAVIUM_CPU_PART_THUNDERX2 0x0AF +#define BRCM_CPU_PART_BRAHMA_B53 0x100 #define BRCM_CPU_PART_VULCAN 0x516 #define QCOM_CPU_PART_FALKOR_V1 0x800 @@ -105,6 +106,7 @@ #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) +#define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53) #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 091e3ec0f420..b5eeba7f5d84 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -743,6 +743,16 @@ static const struct midr_range erratum_1418040_list[] = { }; #endif +#ifdef CONFIG_ARM64_ERRATUM_845719 +static const struct midr_range erratum_845719_list[] = { + /* Cortex-A53 r0p[01234] */ + MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), + /* Brahma-B53 r0p[0] */ + MIDR_REV(MIDR_BRAHMA_B53, 0, 0), + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -783,10 +793,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_845719 { - /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 845719", .capability = ARM64_WORKAROUND_845719, - ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), + ERRATA_MIDR_RANGE_LIST(erratum_845719_list), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 -- cgit v1.2.3 From e059770cb1cdfbcbe3f1748f76005861cc79dd1a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 31 Oct 2019 14:47:24 -0700 Subject: arm64: Brahma-B53 is SSB and spectre v2 safe Add the Brahma-B53 CPU (all versions) to the whitelists of CPUs for the SSB and spectre v2 mitigations. Signed-off-by: Florian Fainelli Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b5eeba7f5d84..a1983c0a872b 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -489,6 +489,7 @@ static const struct midr_range arm64_ssb_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), {}, }; @@ -573,6 +574,7 @@ static const struct midr_range spectre_v2_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), { /* sentinel */ } }; -- cgit v1.2.3 From 1cf45b8fdbb87040e1d1bd793891089f4678aa41 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 31 Oct 2019 14:47:25 -0700 Subject: arm64: apply ARM64_ERRATUM_843419 workaround for Brahma-B53 core The Broadcom Brahma-B53 core is susceptible to the issue described by ARM64_ERRATUM_843419 so this commit enables the workaround to be applied when executing on that core. Since there are now multiple entries to match, we must convert the existing ARM64_ERRATUM_843419 into an erratum list and use cpucap_multi_entry_cap_matches to match our entries. Signed-off-by: Florian Fainelli Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/kernel/cpu_errata.c | 23 ++++++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 189a1768e26a..5a09661330fc 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -93,6 +93,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 | +----------------+-----------------+-----------------+-----------------------------+ +| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_843419 | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX ITS | #22375,24313 | CAVIUM_ERRATUM_22375 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a1983c0a872b..93f34b4eca25 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -755,6 +755,23 @@ static const struct midr_range erratum_845719_list[] = { }; #endif +#ifdef CONFIG_ARM64_ERRATUM_843419 +static const struct arm64_cpu_capabilities erratum_843419_list[] = { + { + /* Cortex-A53 r0p[01234] */ + .matches = is_affected_midr_range, + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), + MIDR_FIXED(0x4, BIT(8)), + }, + { + /* Brahma-B53 r0p[0] */ + .matches = is_affected_midr_range, + ERRATA_MIDR_REV(MIDR_BRAHMA_B53, 0, 0), + }, + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -786,11 +803,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_843419 { - /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 843419", .capability = ARM64_WORKAROUND_843419, - ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), - MIDR_FIXED(0x4, BIT(8)), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = cpucap_multi_entry_cap_matches, + .match_list = erratum_843419_list, }, #endif #ifdef CONFIG_ARM64_ERRATUM_845719 -- cgit v1.2.3 From be3df3dd4c70ee020587a943a31b98a0fb4b6424 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Oct 2019 18:40:32 -0400 Subject: NFSv4: Don't allow a cached open with a revoked delegation If the delegation is marked as being revoked, we must not use it for cached opens. Fixes: 869f9dfa4d6d ("NFSv4: Fix races between nfs_remove_bad_delegation() and delegation return") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 10 ++++++++++ fs/nfs/delegation.h | 1 + fs/nfs/nfs4proc.c | 7 ++----- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 071b90a45933..ccdfb5f98f35 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -53,6 +53,16 @@ nfs4_is_valid_delegation(const struct nfs_delegation *delegation, return false; } +struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode) +{ + struct nfs_delegation *delegation; + + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (nfs4_is_valid_delegation(delegation, 0)) + return delegation; + return NULL; +} + static int nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) { diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 9eb87ae4c982..8b14d441e699 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -68,6 +68,7 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred); bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); +struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); int nfs4_check_delegation(struct inode *inode, fmode_t flags); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ab8ca20fd579..caacf5e7f5e1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1440,8 +1440,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode, return 0; if ((delegation->type & fmode) != fmode) return 0; - if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) - return 0; switch (claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_FH: @@ -1810,7 +1808,6 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) { struct nfs4_state *state = opendata->state; - struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; int open_mode = opendata->o_arg.open_flags; fmode_t fmode = opendata->o_arg.fmode; @@ -1827,7 +1824,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) } spin_unlock(&state->owner->so_lock); rcu_read_lock(); - delegation = rcu_dereference(nfsi->delegation); + delegation = nfs4_get_valid_delegation(state->inode); if (!can_open_delegated(delegation, fmode, claim)) { rcu_read_unlock(); break; @@ -2371,7 +2368,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->o_arg.open_flags, claim)) goto out_no_action; rcu_read_lock(); - delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); + delegation = nfs4_get_valid_delegation(data->state->inode); if (can_open_delegated(delegation, data->o_arg.fmode, claim)) goto unlock_no_action; rcu_read_unlock(); -- cgit v1.2.3 From 79cc55422ce99be5964bde208ba8557174720893 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Oct 2019 18:40:33 -0400 Subject: NFS: Fix an RCU lock leak in nfs4_refresh_delegation_stateid() A typo in nfs4_refresh_delegation_stateid() means we're leaking an RCU lock, and always returning a value of 'false'. As the function description states, we were always supposed to return 'true' if a matching delegation was found. Fixes: 12f275cdd163 ("NFSv4: Retry CLOSE and DELEGRETURN on NFS4ERR_OLD_STATEID.") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ccdfb5f98f35..af549d70ec50 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1191,7 +1191,7 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode) if (delegation != NULL && nfs4_stateid_match_other(dst, &delegation->stateid)) { dst->seqid = delegation->stateid.seqid; - return ret; + ret = true; } rcu_read_unlock(); out: -- cgit v1.2.3 From 4202e219edd6cc164c042e16fa327525410705ae Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 1 Nov 2019 20:17:25 +0800 Subject: net: ethernet: arc: add the missed clk_disable_unprepare The remove misses to disable and unprepare priv->macclk like what is done when probe fails. Add the missed call in remove. Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_rockchip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c index 42d2e1b02c44..664d664e0925 100644 --- a/drivers/net/ethernet/arc/emac_rockchip.c +++ b/drivers/net/ethernet/arc/emac_rockchip.c @@ -256,6 +256,9 @@ static int emac_rockchip_remove(struct platform_device *pdev) if (priv->regulator) regulator_disable(priv->regulator); + if (priv->soc_data->need_div_macclk) + clk_disable_unprepare(priv->macclk); + free_netdev(ndev); return err; } -- cgit v1.2.3 From 8d5cfd7f76a2414e23c74bb8858af7540365d985 Mon Sep 17 00:00:00 2001 From: Manfred Rudigier Date: Thu, 15 Aug 2019 13:55:20 -0700 Subject: igb: Fix constant media auto sense switching when no cable is connected At least on the i350 there is an annoying behavior that is maybe also present on 82580 devices, but was probably not noticed yet as MAS is not widely used. If no cable is connected on both fiber/copper ports the media auto sense code will constantly swap between them as part of the watchdog task and produce many unnecessary kernel log messages. The swap code responsible for this behavior (switching to fiber) should not be executed if the current media type is copper and there is no signal detected on the fiber port. In this case we can safely wait until the AUTOSENSE_EN bit is cleared. Signed-off-by: Manfred Rudigier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 17a961c3d6e4..9148c62d9ac5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2065,7 +2065,8 @@ static void igb_check_swap_media(struct igb_adapter *adapter) if ((hw->phy.media_type == e1000_media_type_copper) && (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) { swap_now = true; - } else if (!(connsw & E1000_CONNSW_SERDESD)) { + } else if ((hw->phy.media_type != e1000_media_type_copper) && + !(connsw & E1000_CONNSW_SERDESD)) { /* copper signal takes time to appear */ if (adapter->copper_tries < 4) { adapter->copper_tries++; -- cgit v1.2.3 From 2c19e395e061a1c1442e0623ce5ec88ecc6c5a9b Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Mon, 7 Oct 2019 15:07:24 -0700 Subject: i40e: Fix receive buffer starvation for AF_XDP Magnus's fix to resolve a potential receive buffer starvation for AF_XDP got applied to both the i40e_xsk_umem_enable/disable() functions, when it should have only been applied to the "enable". So clean up the undesired code in the disable function. CC: Magnus Karlsson Fixes: 1f459bdc2007 ("i40e: fix potential RX buffer starvation for AF_XDP") Signed-off-by: Jeff Kirsher Tested-by: Andrew Bowers --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index b1c3227ae4ab..a05dfecdd9b4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -157,11 +157,6 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid) err = i40e_queue_pair_enable(vsi, qid); if (err) return err; - - /* Kick start the NAPI context so that receiving will start */ - err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX); - if (err) - return err; } return 0; -- cgit v1.2.3 From 8472ba62154058b64ebb83d5f57259a352d28697 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 12 Aug 2019 00:59:21 -0500 Subject: e1000: fix memory leaks In e1000_set_ringparam(), 'tx_old' and 'rx_old' are not deallocated if e1000_up() fails, leading to memory leaks. Refactor the code to fix this issue. Signed-off-by: Wenwen Wang Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 71d3d8854d8f..be56e631d693 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -607,6 +607,7 @@ static int e1000_set_ringparam(struct net_device *netdev, for (i = 0; i < adapter->num_rx_queues; i++) rxdr[i].count = rxdr->count; + err = 0; if (netif_running(adapter->netdev)) { /* Try to get new resources before deleting old */ err = e1000_setup_all_rx_resources(adapter); @@ -627,14 +628,13 @@ static int e1000_set_ringparam(struct net_device *netdev, adapter->rx_ring = rxdr; adapter->tx_ring = txdr; err = e1000_up(adapter); - if (err) - goto err_setup; } kfree(tx_old); kfree(rx_old); clear_bit(__E1000_RESETTING, &adapter->flags); - return 0; + return err; + err_setup_tx: e1000_free_all_rx_resources(adapter); err_setup_rx: @@ -646,7 +646,6 @@ err_alloc_rx: err_alloc_tx: if (netif_running(adapter->netdev)) e1000_up(adapter); -err_setup: clear_bit(__E1000_RESETTING, &adapter->flags); return err; } -- cgit v1.2.3 From 17df5ae1b3e186338c6f584eaa32a9eed5460991 Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Wed, 2 Oct 2019 17:09:55 +0200 Subject: Documentation: networking: device drivers: Remove stray asterisks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These asterisks were once references to a line that said: "* Other names and brands may be claimed as the property of others." But now, they serve no purpose; they can only irritate the reader. Fixes: de3edab4276c ("e1000: update README for e1000") Fixes: a3fb65680f65 ("e100.txt: Cleanup license info in kernel doc") Fixes: da8c01c4502a ("e1000e.txt: Add e1000e documentation") Fixes: f12a84a9f650 ("Documentation: fm10k: Add kernel documentation") Fixes: b55c52b1938c ("igb.txt: Add igb documentation") Fixes: c4e9b56e2442 ("igbvf.txt: Add igbvf Documentation") Fixes: d7064f4c192c ("Documentation/networking/: Update Intel wired LAN driver documentation") Fixes: c4b8c01112a1 ("ixgbevf.txt: Update ixgbevf documentation") Fixes: 1e06edcc2f22 ("Documentation: i40e: Prepare documentation for RST conversion") Fixes: 105bf2fe6b32 ("i40evf: add driver to kernel build system") Fixes: 1fae869bcf3d ("Documentation: ice: Prepare documentation for RST conversion") Fixes: df69ba43217d ("ionic: Add basic framework for IONIC Network device driver") Signed-off-by: Jonathan Neuschäfer Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- Documentation/networking/device_drivers/intel/e100.rst | 14 +++++++------- Documentation/networking/device_drivers/intel/e1000.rst | 12 ++++++------ Documentation/networking/device_drivers/intel/e1000e.rst | 14 +++++++------- Documentation/networking/device_drivers/intel/fm10k.rst | 10 +++++----- Documentation/networking/device_drivers/intel/i40e.rst | 8 ++++---- Documentation/networking/device_drivers/intel/iavf.rst | 8 ++++---- Documentation/networking/device_drivers/intel/ice.rst | 6 +++--- Documentation/networking/device_drivers/intel/igb.rst | 12 ++++++------ Documentation/networking/device_drivers/intel/igbvf.rst | 6 +++--- Documentation/networking/device_drivers/intel/ixgbe.rst | 10 +++++----- Documentation/networking/device_drivers/intel/ixgbevf.rst | 6 +++--- Documentation/networking/device_drivers/pensando/ionic.rst | 6 +++--- 12 files changed, 56 insertions(+), 56 deletions(-) diff --git a/Documentation/networking/device_drivers/intel/e100.rst b/Documentation/networking/device_drivers/intel/e100.rst index 2b9f4887beda..caf023cc88de 100644 --- a/Documentation/networking/device_drivers/intel/e100.rst +++ b/Documentation/networking/device_drivers/intel/e100.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -============================================================== -Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters -============================================================== +============================================================= +Linux Base Driver for the Intel(R) PRO/100 Family of Adapters +============================================================= June 1, 2018 @@ -21,7 +21,7 @@ Contents In This Release =============== -This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of +This file describes the Linux Base Driver for the Intel(R) PRO/100 Family of Adapters. This driver includes support for Itanium(R)2-based systems. For questions related to hardware requirements, refer to the documentation @@ -138,9 +138,9 @@ version 1.6 or later is required for this functionality. The latest release of ethtool can be found from https://www.kernel.org/pub/software/network/ethtool/ -Enabling Wake on LAN* (WoL) ---------------------------- -WoL is provided through the ethtool* utility. For instructions on +Enabling Wake on LAN (WoL) +-------------------------- +WoL is provided through the ethtool utility. For instructions on enabling WoL with ethtool, refer to the ethtool man page. WoL will be enabled on the system during the next shut down or reboot. For this driver version, in order to enable WoL, the e100 driver must be loaded diff --git a/Documentation/networking/device_drivers/intel/e1000.rst b/Documentation/networking/device_drivers/intel/e1000.rst index 956560b6e745..4aaae0f7d6ba 100644 --- a/Documentation/networking/device_drivers/intel/e1000.rst +++ b/Documentation/networking/device_drivers/intel/e1000.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -=========================================================== -Linux* Base Driver for Intel(R) Ethernet Network Connection -=========================================================== +========================================================== +Linux Base Driver for Intel(R) Ethernet Network Connection +========================================================== Intel Gigabit Linux driver. Copyright(c) 1999 - 2013 Intel Corporation. @@ -438,10 +438,10 @@ ethtool The latest release of ethtool can be found from https://www.kernel.org/pub/software/network/ethtool/ -Enabling Wake on LAN* (WoL) ---------------------------- +Enabling Wake on LAN (WoL) +-------------------------- - WoL is configured through the ethtool* utility. + WoL is configured through the ethtool utility. WoL will be enabled on the system during the next shut down or reboot. For this driver version, in order to enable WoL, the e1000 driver must be diff --git a/Documentation/networking/device_drivers/intel/e1000e.rst b/Documentation/networking/device_drivers/intel/e1000e.rst index 01999f05509c..f49cd370e7bf 100644 --- a/Documentation/networking/device_drivers/intel/e1000e.rst +++ b/Documentation/networking/device_drivers/intel/e1000e.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -====================================================== -Linux* Driver for Intel(R) Ethernet Network Connection -====================================================== +===================================================== +Linux Driver for Intel(R) Ethernet Network Connection +===================================================== Intel Gigabit Linux driver. Copyright(c) 2008-2018 Intel Corporation. @@ -338,7 +338,7 @@ and higher cannot be forced. Use the autonegotiation advertising setting to manually set devices for 1 Gbps and higher. Speed, duplex, and autonegotiation advertising are configured through the -ethtool* utility. +ethtool utility. Caution: Only experienced network administrators should force speed and duplex or change autonegotiation advertising manually. The settings at the switch must @@ -351,9 +351,9 @@ will not attempt to auto-negotiate with its link partner since those adapters operate only in full duplex and only at their native speed. -Enabling Wake on LAN* (WoL) ---------------------------- -WoL is configured through the ethtool* utility. +Enabling Wake on LAN (WoL) +-------------------------- +WoL is configured through the ethtool utility. WoL will be enabled on the system during the next shut down or reboot. For this driver version, in order to enable WoL, the e1000e driver must be loaded diff --git a/Documentation/networking/device_drivers/intel/fm10k.rst b/Documentation/networking/device_drivers/intel/fm10k.rst index ac3269e34f55..4d279e64e221 100644 --- a/Documentation/networking/device_drivers/intel/fm10k.rst +++ b/Documentation/networking/device_drivers/intel/fm10k.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -============================================================== -Linux* Base Driver for Intel(R) Ethernet Multi-host Controller -============================================================== +============================================================= +Linux Base Driver for Intel(R) Ethernet Multi-host Controller +============================================================= August 20, 2018 Copyright(c) 2015-2018 Intel Corporation. @@ -120,8 +120,8 @@ rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 m|v|t|s|d|f|n|r Known Issues/Troubleshooting ============================ -Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS under Linux KVM ---------------------------------------------------------------------------------------- +Enabling SR-IOV in a 64-bit Microsoft Windows Server 2012/R2 guest OS under Linux KVM +------------------------------------------------------------------------------------- KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This includes traditional PCIe devices, as well as SR-IOV-capable devices based on the Intel Ethernet Controller XL710. diff --git a/Documentation/networking/device_drivers/intel/i40e.rst b/Documentation/networking/device_drivers/intel/i40e.rst index 848fd388fa6e..8a9b18573688 100644 --- a/Documentation/networking/device_drivers/intel/i40e.rst +++ b/Documentation/networking/device_drivers/intel/i40e.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -================================================================== -Linux* Base Driver for the Intel(R) Ethernet Controller 700 Series -================================================================== +================================================================= +Linux Base Driver for the Intel(R) Ethernet Controller 700 Series +================================================================= Intel 40 Gigabit Linux driver. Copyright(c) 1999-2018 Intel Corporation. @@ -384,7 +384,7 @@ NOTE: You cannot set the speed for devices based on the Intel(R) Ethernet Network Adapter XXV710 based devices. Speed, duplex, and autonegotiation advertising are configured through the -ethtool* utility. +ethtool utility. Caution: Only experienced network administrators should force speed and duplex or change autonegotiation advertising manually. The settings at the switch must diff --git a/Documentation/networking/device_drivers/intel/iavf.rst b/Documentation/networking/device_drivers/intel/iavf.rst index cfc08842e32c..84ac7e75f363 100644 --- a/Documentation/networking/device_drivers/intel/iavf.rst +++ b/Documentation/networking/device_drivers/intel/iavf.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -================================================================== -Linux* Base Driver for Intel(R) Ethernet Adaptive Virtual Function -================================================================== +================================================================= +Linux Base Driver for Intel(R) Ethernet Adaptive Virtual Function +================================================================= Intel Ethernet Adaptive Virtual Function Linux driver. Copyright(c) 2013-2018 Intel Corporation. @@ -19,7 +19,7 @@ Contents Overview ======== -This file describes the iavf Linux* Base Driver. This driver was formerly +This file describes the iavf Linux Base Driver. This driver was formerly called i40evf. The iavf driver supports the below mentioned virtual function devices and diff --git a/Documentation/networking/device_drivers/intel/ice.rst b/Documentation/networking/device_drivers/intel/ice.rst index c220aa2711c6..ee43ea57d443 100644 --- a/Documentation/networking/device_drivers/intel/ice.rst +++ b/Documentation/networking/device_drivers/intel/ice.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -=================================================================== -Linux* Base Driver for the Intel(R) Ethernet Connection E800 Series -=================================================================== +================================================================== +Linux Base Driver for the Intel(R) Ethernet Connection E800 Series +================================================================== Intel ice Linux driver. Copyright(c) 2018 Intel Corporation. diff --git a/Documentation/networking/device_drivers/intel/igb.rst b/Documentation/networking/device_drivers/intel/igb.rst index fc8cfaa5dcfa..87e560fe5eaa 100644 --- a/Documentation/networking/device_drivers/intel/igb.rst +++ b/Documentation/networking/device_drivers/intel/igb.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -=========================================================== -Linux* Base Driver for Intel(R) Ethernet Network Connection -=========================================================== +========================================================== +Linux Base Driver for Intel(R) Ethernet Network Connection +========================================================== Intel Gigabit Linux driver. Copyright(c) 1999-2018 Intel Corporation. @@ -129,9 +129,9 @@ version is required for this functionality. Download it at: https://www.kernel.org/pub/software/network/ethtool/ -Enabling Wake on LAN* (WoL) ---------------------------- -WoL is configured through the ethtool* utility. +Enabling Wake on LAN (WoL) +-------------------------- +WoL is configured through the ethtool utility. WoL will be enabled on the system during the next shut down or reboot. For this driver version, in order to enable WoL, the igb driver must be loaded diff --git a/Documentation/networking/device_drivers/intel/igbvf.rst b/Documentation/networking/device_drivers/intel/igbvf.rst index 9cddabe8108e..557fc020ef31 100644 --- a/Documentation/networking/device_drivers/intel/igbvf.rst +++ b/Documentation/networking/device_drivers/intel/igbvf.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -============================================================ -Linux* Base Virtual Function Driver for Intel(R) 1G Ethernet -============================================================ +=========================================================== +Linux Base Virtual Function Driver for Intel(R) 1G Ethernet +=========================================================== Intel Gigabit Virtual Function Linux driver. Copyright(c) 1999-2018 Intel Corporation. diff --git a/Documentation/networking/device_drivers/intel/ixgbe.rst b/Documentation/networking/device_drivers/intel/ixgbe.rst index c7d25483fedb..f1d5233e5e51 100644 --- a/Documentation/networking/device_drivers/intel/ixgbe.rst +++ b/Documentation/networking/device_drivers/intel/ixgbe.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -============================================================================= -Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters -============================================================================= +=========================================================================== +Linux Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters +=========================================================================== Intel 10 Gigabit Linux driver. Copyright(c) 1999-2018 Intel Corporation. @@ -519,8 +519,8 @@ The offload is also supported for ixgbe's VFs, but the VF must be set as Known Issues/Troubleshooting ============================ -Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS ------------------------------------------------------------------------ +Enabling SR-IOV in a 64-bit Microsoft Windows Server 2012/R2 guest OS +--------------------------------------------------------------------- Linux KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This includes traditional PCIe devices, as well as SR-IOV-capable devices based on the Intel Ethernet Controller XL710. diff --git a/Documentation/networking/device_drivers/intel/ixgbevf.rst b/Documentation/networking/device_drivers/intel/ixgbevf.rst index 5d4977360157..76bbde736f21 100644 --- a/Documentation/networking/device_drivers/intel/ixgbevf.rst +++ b/Documentation/networking/device_drivers/intel/ixgbevf.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -============================================================= -Linux* Base Virtual Function Driver for Intel(R) 10G Ethernet -============================================================= +============================================================ +Linux Base Virtual Function Driver for Intel(R) 10G Ethernet +============================================================ Intel 10 Gigabit Virtual Function Linux driver. Copyright(c) 1999-2018 Intel Corporation. diff --git a/Documentation/networking/device_drivers/pensando/ionic.rst b/Documentation/networking/device_drivers/pensando/ionic.rst index 13935896bee6..c17d680cf334 100644 --- a/Documentation/networking/device_drivers/pensando/ionic.rst +++ b/Documentation/networking/device_drivers/pensando/ionic.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0+ -========================================================== -Linux* Driver for the Pensando(R) Ethernet adapter family -========================================================== +======================================================== +Linux Driver for the Pensando(R) Ethernet adapter family +======================================================== Pensando Linux Ethernet driver. Copyright(c) 2019 Pensando Systems, Inc -- cgit v1.2.3 From 451fe015b2857de3d8027ef606284a205e177724 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Thu, 3 Oct 2019 23:53:57 -0700 Subject: ixgbe: Remove duplicate clear_bit() call __IXGBE_RX_BUILD_SKB_ENABLED bit is already cleared. Signed-off-by: Igor Pylypiv Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 1ce2397306b9..91b3780ddb04 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4310,7 +4310,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state)) set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); - clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) continue; -- cgit v1.2.3 From a904a0693c189691eeee64f6c6b188bd7dc244e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Nov 2019 10:32:19 -0700 Subject: inet: stop leaking jiffies on the wire Historically linux tried to stick to RFC 791, 1122, 2003 for IPv4 ID field generation. RFC 6864 made clear that no matter how hard we try, we can not ensure unicity of IP ID within maximum lifetime for all datagrams with a given source address/destination address/protocol tuple. Linux uses a per socket inet generator (inet_id), initialized at connection startup with a XOR of 'jiffies' and other fields that appear clear on the wire. Thiemo Nagel pointed that this strategy is a privacy concern as this provides 16 bits of entropy to fingerprint devices. Let's switch to a random starting point, this is just as good as far as RFC 6864 is concerned and does not leak anything critical. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: Thiemo Nagel Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chtls/chtls_cm.c | 2 +- net/dccp/ipv4.c | 2 +- net/ipv4/datagram.c | 2 +- net/ipv4/tcp_ipv4.c | 4 ++-- net/sctp/socket.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 774d991d7cca..aca75237bbcf 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1297,7 +1297,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt) tp->write_seq = snd_isn; tp->snd_nxt = snd_isn; tp->snd_una = snd_isn; - inet_sk(sk)->inet_id = tp->write_seq ^ jiffies; + inet_sk(sk)->inet_id = prandom_u32(); assign_rxopt(sk, opt); if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10)) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d9b4200ed12d..0d8f782c25cc 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -117,7 +117,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, inet->inet_dport); - inet->inet_id = dp->dccps_iss ^ jiffies; + inet->inet_id = prandom_u32(); err = dccp_connect(sk); rt = NULL; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 9a0fe0c2fa02..4a8550c49202 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len reuseport_has_conns(sk, true); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); - inet->inet_id = jiffies; + inet->inet_id = prandom_u32(); sk_dst_set(sk, &rt->dst); err = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b74192695955..67b2dc7a1727 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -303,7 +303,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr); } - inet->inet_id = tp->write_seq ^ jiffies; + inet->inet_id = prandom_u32(); if (tcp_fastopen_defer_connect(sk, &err)) return err; @@ -1450,7 +1450,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; - newinet->inet_id = newtp->write_seq ^ jiffies; + newinet->inet_id = prandom_u32(); if (!dst) { dst = inet_csk_route_child_sock(sk, newsk, req); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ca81e06df165..ffd3262b7a41 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9306,7 +9306,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->inet_rcv_saddr = inet->inet_rcv_saddr; newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; - newinet->inet_id = asoc->next_tsn ^ jiffies; + newinet->inet_id = prandom_u32(); newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; -- cgit v1.2.3 From 9cfeeb576d49a7b5e643b8066ba64a55e8417c5d Mon Sep 17 00:00:00 2001 From: Yangchun Fu Date: Fri, 1 Nov 2019 10:09:56 -0700 Subject: gve: Fixes DMA synchronization. Synces the DMA buffer properly in order for CPU and device to see the most up-to-data data. Signed-off-by: Yangchun Fu Reviewed-by: Catherine Sullivan Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 2 ++ drivers/net/ethernet/google/gve/gve_tx.c | 24 ++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 59564ac99d2a..edec61dfc868 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -289,6 +289,8 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD; page_info = &rx->data.page_info[idx]; + dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx], + PAGE_SIZE, DMA_FROM_DEVICE); /* gvnic can only receive into registered segments. If the buffer * can't be recycled, our only choice is to copy the data out of diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 778b87b5a06c..0a9a7ee2a866 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -390,7 +390,21 @@ static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc, seg_desc->seg.seg_addr = cpu_to_be64(addr); } -static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb) +static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses, + u64 iov_offset, u64 iov_len) +{ + dma_addr_t dma; + u64 addr; + + for (addr = iov_offset; addr < iov_offset + iov_len; + addr += PAGE_SIZE) { + dma = page_buses[addr / PAGE_SIZE]; + dma_sync_single_for_device(dev, dma, PAGE_SIZE, DMA_TO_DEVICE); + } +} + +static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb, + struct device *dev) { int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset; union gve_tx_desc *pkt_desc, *seg_desc; @@ -432,6 +446,9 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb) skb_copy_bits(skb, 0, tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset, hlen); + gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses, + info->iov[hdr_nfrags - 1].iov_offset, + info->iov[hdr_nfrags - 1].iov_len); copy_offset = hlen; for (i = payload_iov; i < payload_nfrags + payload_iov; i++) { @@ -445,6 +462,9 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb) skb_copy_bits(skb, copy_offset, tx->tx_fifo.base + info->iov[i].iov_offset, info->iov[i].iov_len); + gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses, + info->iov[i].iov_offset, + info->iov[i].iov_len); copy_offset += info->iov[i].iov_len; } @@ -473,7 +493,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) gve_tx_put_doorbell(priv, tx->q_resources, tx->req); return NETDEV_TX_BUSY; } - nsegs = gve_tx_add_skb(tx, skb); + nsegs = gve_tx_add_skb(tx, skb, &priv->pdev->dev); netdev_tx_sent_queue(tx->netdev_txq, skb->len); skb_tx_timestamp(skb); -- cgit v1.2.3 From 9d68db5092c5fac99fccfdeab3f04df0b27d1762 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 31 Oct 2019 15:42:26 -0700 Subject: net: phylink: Fix phylink_dbg() macro The phylink_dbg() macro does not follow dynamic debug or defined(DEBUG) and as a result, it spams the kernel log since a PR_DEBUG level is currently used. Fix it to be defined appropriately whether CONFIG_DYNAMIC_DEBUG or defined(DEBUG) are set. Fixes: 17091180b152 ("net: phylink: Add phylink_{printk, err, warn, info, dbg} macros") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 20e2ebe458f2..a578f7ebf715 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -87,8 +87,24 @@ struct phylink { phylink_printk(KERN_WARNING, pl, fmt, ##__VA_ARGS__) #define phylink_info(pl, fmt, ...) \ phylink_printk(KERN_INFO, pl, fmt, ##__VA_ARGS__) +#if defined(CONFIG_DYNAMIC_DEBUG) #define phylink_dbg(pl, fmt, ...) \ +do { \ + if ((pl)->config->type == PHYLINK_NETDEV) \ + netdev_dbg((pl)->netdev, fmt, ##__VA_ARGS__); \ + else if ((pl)->config->type == PHYLINK_DEV) \ + dev_dbg((pl)->dev, fmt, ##__VA_ARGS__); \ +} while (0) +#elif defined(DEBUG) +#define phylink_dbg(pl, fmt, ...) \ phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__) +#else +#define phylink_dbg(pl, fmt, ...) \ +({ \ + if (0) \ + phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__); \ +}) +#endif /** * phylink_set_port_modes() - set the port type modes in the ethtool mask -- cgit v1.2.3 From 5fc0f21246e50afdf318b5a3a941f7f4f57b8947 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 31 Oct 2019 15:54:05 -0700 Subject: net: dsa: bcm_sf2: Fix IMP setup for port different than 8 Since it became possible for the DSA core to use a CPU port different than 8, our bcm_sf2_imp_setup() function was broken because it assumes that registers are applicable to port 8. In particular, the port's MAC is going to stay disabled, so make sure we clear the RX_DIS and TX_DIS bits if we are not configured for port 8. Fixes: 9f91484f6fcc ("net: dsa: make "label" property optional for dsa2") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 26509fa37a50..d44651ad520c 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -37,22 +37,11 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) unsigned int i; u32 reg, offset; - if (priv->type == BCM7445_DEVICE_ID) - offset = CORE_STS_OVERRIDE_IMP; - else - offset = CORE_STS_OVERRIDE_IMP2; - /* Enable the port memories */ reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL); reg &= ~P_TXQ_PSM_VDD(port); core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL); - /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ - reg = core_readl(priv, CORE_IMP_CTL); - reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN); - reg &= ~(RX_DIS | TX_DIS); - core_writel(priv, reg, CORE_IMP_CTL); - /* Enable forwarding */ core_writel(priv, SW_FWDG_EN, CORE_SWMODE); @@ -71,10 +60,27 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) b53_brcm_hdr_setup(ds, port); - /* Force link status for IMP port */ - reg = core_readl(priv, offset); - reg |= (MII_SW_OR | LINK_STS); - core_writel(priv, reg, offset); + if (port == 8) { + if (priv->type == BCM7445_DEVICE_ID) + offset = CORE_STS_OVERRIDE_IMP; + else + offset = CORE_STS_OVERRIDE_IMP2; + + /* Force link status for IMP port */ + reg = core_readl(priv, offset); + reg |= (MII_SW_OR | LINK_STS); + core_writel(priv, reg, offset); + + /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ + reg = core_readl(priv, CORE_IMP_CTL); + reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN); + reg &= ~(RX_DIS | TX_DIS); + core_writel(priv, reg, CORE_IMP_CTL); + } else { + reg = core_readl(priv, CORE_G_PCTL_PORT(port)); + reg &= ~(RX_DIS | TX_DIS); + core_writel(priv, reg, CORE_G_PCTL_PORT(port)); + } } static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable) -- cgit v1.2.3 From 62bdc8fd1c21d4263ebd18bec57f82532d09249f Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 1 Nov 2019 00:10:21 +0100 Subject: r8169: fix wrong PHY ID issue with RTL8168dp As reported in [0] at least one RTL8168dp version has problems establishing a link. This chip version has an integrated RTL8211b PHY, however the chip seems to report a wrong PHY ID, resulting in a wrong PHY driver (for Generic Realtek PHY) being loaded. Work around this issue by adding a hook to r8168dp_2_mdio_read() for returning the correct PHY ID. [0] https://bbs.archlinux.org/viewtopic.php?id=246508 Fixes: 242cd9b5866a ("r8169: use phy_resume/phy_suspend") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 350b0d949611..5064c292b873 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1029,6 +1029,10 @@ static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg) { int value; + /* Work around issue with chip reporting wrong PHY ID */ + if (reg == MII_PHYSID2) + return 0xc912; + r8168dp_2_mdio_start(tp); value = r8169_mdio_read(tp, reg); -- cgit v1.2.3 From d64479a3e3f9924074ca7b50bd72fa5211dca9c1 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 31 Oct 2019 16:24:36 -0700 Subject: selftests: net: reuseport_dualstack: fix uninitalized parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This test reports EINVAL for getsockopt(SOL_SOCKET, SO_DOMAIN) occasionally due to the uninitialized length parameter. Initialize it to fix this, and also use int for "test_family" to comply with the API standard. Fixes: d6a61f80b871 ("soreuseport: test mixed v4/v6 sockets") Reported-by: Maciej Żenczykowski Signed-off-by: Eric Dumazet Signed-off-by: Wei Wang Cc: Craig Gallek Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_dualstack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c index fe3230c55986..fb7a59ed759e 100644 --- a/tools/testing/selftests/net/reuseport_dualstack.c +++ b/tools/testing/selftests/net/reuseport_dualstack.c @@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count, int proto) { struct epoll_event ev; int epfd, i, test_fd; - uint16_t test_family; + int test_family; socklen_t len; epfd = epoll_create(1); @@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count, int proto) send_from_v4(proto); test_fd = receive_once(epfd, proto); + len = sizeof(test_family); if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len)) error(1, errno, "failed to read socket domain"); if (test_family != AF_INET) -- cgit v1.2.3 From 8101e069418d136b995b3da81f1af72637082fda Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 31 Oct 2019 20:06:58 -0700 Subject: selftests: bpf: Skip write only files in debugfs DebugFS for netdevsim now contains some "action trigger" files which are write only. Don't try to capture the contents of those. Note that we can't use os.access() because the script requires root. Fixes: 4418f862d675 ("netdevsim: implement support for devlink region and snapshots") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_offload.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 15a666329a34..1afa22c88e42 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -22,6 +22,7 @@ import os import pprint import random import re +import stat import string import struct import subprocess @@ -311,7 +312,11 @@ class DebugfsDir: for f in out.split(): if f == "ports": continue + p = os.path.join(path, f) + if not os.stat(p).st_mode & stat.S_IRUSR: + continue + if os.path.isfile(p): _, out = cmd('cat %s/%s' % (path, f)) dfs[f] = out.strip() -- cgit v1.2.3 From 41aa29a58b5f7f7be43f35372ef411f304a87a0d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 31 Oct 2019 20:06:59 -0700 Subject: net: cls_bpf: fix NULL deref on offload filter removal Commit 401192113730 ("net: sched: refactor block offloads counter usage") missed the fact that either new prog or old prog may be NULL. Fixes: 401192113730 ("net: sched: refactor block offloads counter usage") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index bf10bdaf5012..8229ed4a67be 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -162,16 +162,20 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, cls_bpf.name = obj->bpf_name; cls_bpf.exts_integrated = obj->exts_integrated; - if (oldprog) + if (oldprog && prog) err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf, skip_sw, &oldprog->gen_flags, &oldprog->in_hw_count, &prog->gen_flags, &prog->in_hw_count, true); - else + else if (prog) err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf, skip_sw, &prog->gen_flags, &prog->in_hw_count, true); + else + err = tc_setup_cb_destroy(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &oldprog->gen_flags, + &oldprog->in_hw_count, true); if (prog && err) { cls_bpf_offload_cmd(tp, oldprog, prog, extack); -- cgit v1.2.3 From aefc3e723a78c2e429a64dadd7815ef2a4aecd44 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 31 Oct 2019 20:07:00 -0700 Subject: net: fix installing orphaned programs When netdevice with offloaded BPF programs is destroyed the programs are orphaned and removed from the program IDA - their IDs get released (the programs may remain accessible via existing open file descriptors and pinned files). After IDs are released they are set to 0. This confuses dev_change_xdp_fd() because it compares the __dev_xdp_query() result where 0 means no program with prog->aux->id where 0 means orphaned. dev_change_xdp_fd() would have incorrectly returned success even though it had not installed the program. Since drivers already catch this case via bpf_offload_dev_match() let them handle this case. The error message drivers produce in this case ("program loaded for a different device") is in fact correct as the orphaned program must had to be loaded for a different device. Fixes: c14a9f633d9e ("net: Don't call XDP_SETUP_PROG when nothing is changed") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 96afd464284a..99ac84ff398f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8421,7 +8421,8 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, return -EINVAL; } - if (prog->aux->id == prog_id) { + /* prog->aux->id may be 0 for orphaned device-bound progs */ + if (prog->aux->id && prog->aux->id == prog_id) { bpf_prog_put(prog); return 0; } -- cgit v1.2.3 From 7de086909365cd60a5619a45af3f4152516fd75c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Oct 2019 20:34:44 -0700 Subject: powerpc/bpf: Fix tail call implementation We have seen many crashes on powerpc hosts while loading bpf programs. The problem here is that bpf_int_jit_compile() does a first pass to compute the program length. Then it allocates memory to store the generated program and calls bpf_jit_build_body() a second time (and a third time later) What I have observed is that the second bpf_jit_build_body() could end up using few more words than expected. If bpf_jit_binary_alloc() put the space for the program at the end of the allocated page, we then write on a non mapped memory. It appears that bpf_jit_emit_tail_call() calls bpf_jit_emit_common_epilogue() while ctx->seen might not be stable. Only after the second pass we can be sure ctx->seen wont be changed. Trying to avoid a second pass seems quite complex and probably not worth it. Fixes: ce0761419faef ("powerpc/bpf: Implement support for tail calls") Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Cc: Naveen N. Rao Cc: Sandipan Das Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Link: https://lore.kernel.org/bpf/20191101033444.143741-1-edumazet@google.com --- arch/powerpc/net/bpf_jit_comp64.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 02a59946a78a..be3517ef0574 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1141,6 +1141,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto out_addrs; } + /* + * If we have seen a tail call, we need a second pass. + * This is because bpf_jit_emit_common_epilogue() is called + * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + fp = org_fp; + goto out_addrs; + } + } + /* * Pretend to build prologue, given the features we've seen. This will * update ctgtx.idx as it pretends to output instructions, then we can -- cgit v1.2.3 From 5a74ac4c4a97bd8b7dba054304d598e2a882fea6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 1 Nov 2019 21:36:39 -0400 Subject: idr: Fix idr_get_next_ul race with idr_remove Commit 5c089fd0c734 ("idr: Fix idr_get_next race with idr_remove") neglected to fix idr_get_next_ul(). As far as I can tell, nobody's actually using this interface under the RCU read lock, but fix it now before anybody decides to use it. Fixes: 5c089fd0c734 ("idr: Fix idr_get_next race with idr_remove") Signed-off-by: Matthew Wilcox (Oracle) --- lib/idr.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index 66a374892482..c2cf2c52bbde 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -215,7 +215,7 @@ int idr_for_each(const struct idr *idr, EXPORT_SYMBOL(idr_for_each); /** - * idr_get_next() - Find next populated entry. + * idr_get_next_ul() - Find next populated entry. * @idr: IDR handle. * @nextid: Pointer to an ID. * @@ -224,7 +224,7 @@ EXPORT_SYMBOL(idr_for_each); * to the ID of the found value. To use in a loop, the value pointed to by * nextid must be incremented by the user. */ -void *idr_get_next(struct idr *idr, int *nextid) +void *idr_get_next_ul(struct idr *idr, unsigned long *nextid) { struct radix_tree_iter iter; void __rcu **slot; @@ -245,18 +245,14 @@ void *idr_get_next(struct idr *idr, int *nextid) } if (!slot) return NULL; - id = iter.index + base; - - if (WARN_ON_ONCE(id > INT_MAX)) - return NULL; - *nextid = id; + *nextid = iter.index + base; return entry; } -EXPORT_SYMBOL(idr_get_next); +EXPORT_SYMBOL(idr_get_next_ul); /** - * idr_get_next_ul() - Find next populated entry. + * idr_get_next() - Find next populated entry. * @idr: IDR handle. * @nextid: Pointer to an ID. * @@ -265,22 +261,17 @@ EXPORT_SYMBOL(idr_get_next); * to the ID of the found value. To use in a loop, the value pointed to by * nextid must be incremented by the user. */ -void *idr_get_next_ul(struct idr *idr, unsigned long *nextid) +void *idr_get_next(struct idr *idr, int *nextid) { - struct radix_tree_iter iter; - void __rcu **slot; - unsigned long base = idr->idr_base; unsigned long id = *nextid; + void *entry = idr_get_next_ul(idr, &id); - id = (id < base) ? 0 : id - base; - slot = radix_tree_iter_find(&idr->idr_rt, &iter, id); - if (!slot) + if (WARN_ON_ONCE(id > INT_MAX)) return NULL; - - *nextid = iter.index + base; - return rcu_dereference_raw(*slot); + *nextid = id; + return entry; } -EXPORT_SYMBOL(idr_get_next_ul); +EXPORT_SYMBOL(idr_get_next); /** * idr_replace() - replace pointer for given ID. -- cgit v1.2.3 From 797060ec427c83ce4a64a0278a1e6077dfed683a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 1 Nov 2019 22:21:54 -0400 Subject: radix tree: Remove radix_tree_iter_find This API is unsafe to use under the RCU lock. With no in-tree users remaining, remove it to prevent future bugs. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/radix-tree.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index b5116013f27e..63e62372443a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -315,24 +315,6 @@ radix_tree_iter_lookup(const struct radix_tree_root *root, return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG); } -/** - * radix_tree_iter_find - find a present entry - * @root: radix tree root - * @iter: iterator state - * @index: start location - * - * This function returns the slot containing the entry with the lowest index - * which is at least @index. If @index is larger than any present entry, this - * function returns NULL. The @iter is updated to describe the entry found. - */ -static inline void __rcu ** -radix_tree_iter_find(const struct radix_tree_root *root, - struct radix_tree_iter *iter, unsigned long index) -{ - radix_tree_iter_init(iter, index); - return radix_tree_next_chunk(root, iter, 0); -} - /** * radix_tree_iter_retry - retry this chunk of the iteration * @iter: iterator state -- cgit v1.2.3 From 43b7029f475e7497da1de1f4a1742241812bf266 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 20 Oct 2019 23:47:18 +0200 Subject: HID: i2c-hid: Send power-on command after reset Before commit 67b18dfb8cfc ("HID: i2c-hid: Remove runtime power management"), any i2c-hid touchscreens would typically be runtime-suspended between the driver loading and Xorg or a Wayland compositor opening it, causing it to be resumed again. This means that before this change, we would call i2c_hid_set_power(OFF), i2c_hid_set_power(ON) before the graphical session would start listening to the touchscreen. It turns out that at least some SIS touchscreens, such as the one found on the Asus T100HA, need a power-on command after reset, otherwise they will not send any events. Fixes: 67b18dfb8cfc ("HID: i2c-hid: Remove runtime power management") Cc: Kai-Heng Feng Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index d9c55e30f986..04c088131e04 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -447,8 +447,12 @@ static int i2c_hid_hwreset(struct i2c_client *client) if (ret) { dev_err(&client->dev, "failed to reset device.\n"); i2c_hid_set_power(client, I2C_HID_PWR_SLEEP); + goto out_unlock; } + /* At least some SIS devices need this after reset */ + ret = i2c_hid_set_power(client, I2C_HID_PWR_ON); + out_unlock: mutex_unlock(&ihid->reset_lock); return ret; -- cgit v1.2.3 From f6341c5af4e6e15041be39976d16deca789555fa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 3 Nov 2019 06:36:43 -0500 Subject: idr: Fix integer overflow in idr_for_each_entry If there is an entry at INT_MAX then idr_for_each_entry() will increment id after handling it. This is undefined behaviour, and is caught by UBSAN. Adding 1U to id forces the operation to be carried out as an unsigned addition which (when assigned to id) will result in INT_MIN. Since there is never an entry stored at INT_MIN, idr_get_next() will return NULL, ending the loop as expected. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/idr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/idr.h b/include/linux/idr.h index ee7abae143d3..dc09bd646bcb 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -185,7 +185,7 @@ static inline void idr_preload_end(void) * is convenient for a "not found" value. */ #define idr_for_each_entry(idr, entry, id) \ - for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id) + for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; id += 1U) /** * idr_for_each_entry_ul() - Iterate over an IDR's elements of a given type. -- cgit v1.2.3 From b7e9728f3d7fc5c5c8508d99f1675212af5cfd49 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 2 Nov 2019 00:25:08 -0400 Subject: idr: Fix idr_alloc_u32 on 32-bit systems Attempting to allocate an entry at 0xffffffff when one is already present would succeed in allocating one at 2^32, which would confuse everything. Return -ENOSPC in this case, as expected. Signed-off-by: Matthew Wilcox (Oracle) --- lib/radix-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 18c1dfbb1765..c8fa1d274530 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1529,7 +1529,7 @@ void __rcu **idr_get_free(struct radix_tree_root *root, offset = radix_tree_find_next_bit(node, IDR_FREE, offset + 1); start = next_index(start, node, offset); - if (start > max) + if (start > max || start == 0) return ERR_PTR(-ENOSPC); while (offset == RADIX_TREE_MAP_SIZE) { offset = node->offset + 1; -- cgit v1.2.3 From 26467b0f8407cbd628fa5b7bcfd156e772004155 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Tue, 29 Oct 2019 13:25:02 +0800 Subject: x86/resctrl: Prevent NULL pointer dereference when reading mondata When a mon group is being deleted, rdtgrp->flags is set to RDT_DELETED in rdtgroup_rmdir_mon() firstly. The structure of rdtgrp will be freed until rdtgrp->waitcount is dropped to 0 in rdtgroup_kn_unlock() later. During the window of deleting a mon group, if an application calls rdtgroup_mondata_show() to read mondata under this mon group, 'rdtgrp' returned from rdtgroup_kn_lock_live() is a NULL pointer when rdtgrp->flags is RDT_DELETED. And then 'rdtgrp' is passed in this path: rdtgroup_mondata_show() --> mon_event_read() --> mon_event_count(). Thus it results in NULL pointer dereference in mon_event_count(). Check 'rdtgrp' in rdtgroup_mondata_show(), and return -ENOENT immediately when reading mondata during the window of deleting a mon group. Fixes: d89b7379015f ("x86/intel_rdt/cqm: Add mon_data") Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Fenghua Yu Reviewed-by: Tony Luck Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: pei.p.jia@intel.com Cc: Reinette Chatre Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/1572326702-27577-1-git-send-email-xiaochen.shen@intel.com --- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index efbd54cc4e69..055c8613b531 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -522,6 +522,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) int ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto out; + } md.priv = of->kn->priv; resid = md.u.rid; -- cgit v1.2.3 From a99d8080aaf358d5d23581244e5da23b35e340b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Nov 2019 14:07:26 -0800 Subject: Linux 5.4-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 79be70bf2899..b37d0e8fc61d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- cgit v1.2.3 From 806766af3909258ccab74265e33ce8afd21af952 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 3 Nov 2019 23:38:39 +0100 Subject: Revert "gpio: merrifield: Move hardware initialization to callback" This reverts commit 4c87540940cbc7ddbe9674087919c605fd5c2ef1. This revert is a prerequisite for the later revert of commit 8f86a5b4ad679e4836733b47414226074eee4e4d. Reported-by: Hans de Goede Acked-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpio-merrifield.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 2f1e9da81c1e..9596024c9161 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -362,9 +362,8 @@ static void mrfld_irq_handler(struct irq_desc *desc) chained_irq_exit(irqchip, desc); } -static int mrfld_irq_init_hw(struct gpio_chip *chip) +static void mrfld_irq_init_hw(struct mrfld_gpio *priv) { - struct mrfld_gpio *priv = gpiochip_get_data(chip); void __iomem *reg; unsigned int base; @@ -376,8 +375,6 @@ static int mrfld_irq_init_hw(struct gpio_chip *chip) reg = gpio_reg(&priv->chip, base, GFER); writel(0, reg); } - - return 0; } static const char *mrfld_gpio_get_pinctrl_dev_name(struct mrfld_gpio *priv) @@ -450,7 +447,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id girq = &priv->chip.irq; girq->chip = &mrfld_irqchip; - girq->init_hw = mrfld_irq_init_hw; girq->parent_handler = mrfld_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents, @@ -463,6 +459,8 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; + mrfld_irq_init_hw(priv); + pci_set_drvdata(pdev, priv); retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv); if (retval) { -- cgit v1.2.3 From 52c75f56703e1c40a2bc3f64a140602112dfa302 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 3 Nov 2019 23:40:48 +0100 Subject: Revert "gpio: merrifield: Restore use of irq_base" This reverts commit 6658f87f219427ee776c498e07c878eb5cad1be2. This revert is a prerequisite for the later revert of commit 8f86a5b4ad679e4836733b47414226074eee4e4d. Reported-by: Hans de Goede Acked-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpio-merrifield.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 9596024c9161..4f27ddfe1e2f 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -455,7 +455,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id if (!girq->parents) return -ENOMEM; girq->parents[0] = pdev->irq; - girq->first = irq_base; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; -- cgit v1.2.3 From 1173c3c28abfc3d7b7665db502280ba9322320e6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 3 Nov 2019 23:41:11 +0100 Subject: Revert "gpio: merrifield: Pass irqchip when adding gpiochip" This reverts commit 8f86a5b4ad679e4836733b47414226074eee4e4d. It has been established that this causes a boot regression on both Baytrail and Cherrytrail SoCs, and we can't have that in the final kernel release, so we need to revert it. Reported-by: Hans de Goede Acked-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpio-merrifield.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 4f27ddfe1e2f..3302125e5265 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -397,7 +397,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id { const struct mrfld_gpio_pinrange *range; const char *pinctrl_dev_name; - struct gpio_irq_chip *girq; struct mrfld_gpio *priv; u32 gpio_base, irq_base; void __iomem *base; @@ -445,21 +444,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id raw_spin_lock_init(&priv->lock); - girq = &priv->chip.irq; - girq->chip = &mrfld_irqchip; - girq->parent_handler = mrfld_irq_handler; - girq->num_parents = 1; - girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents, - sizeof(*girq->parents), - GFP_KERNEL); - if (!girq->parents) - return -ENOMEM; - girq->parents[0] = pdev->irq; - girq->default_type = IRQ_TYPE_NONE; - girq->handler = handle_bad_irq; - - mrfld_irq_init_hw(priv); - pci_set_drvdata(pdev, priv); retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv); if (retval) { @@ -481,6 +465,18 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id } } + retval = gpiochip_irqchip_add(&priv->chip, &mrfld_irqchip, irq_base, + handle_bad_irq, IRQ_TYPE_NONE); + if (retval) { + dev_err(&pdev->dev, "could not connect irqchip to gpiochip\n"); + return retval; + } + + mrfld_irq_init_hw(priv); + + gpiochip_set_chained_irqchip(&priv->chip, &mrfld_irqchip, pdev->irq, + mrfld_irq_handler); + return 0; } -- cgit v1.2.3 From f852497c9a07ec9913bb3f3db5f096a8e2ab7e03 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 30 Oct 2019 17:46:23 +0100 Subject: arm64: dts: zii-ultra: fix ARM regulator GPIO handle The GPIO handle is referencing the wrong GPIO, so the voltage did not actually change as intended. The pinmux is already correct, so just correct the GPIO number. Fixes: 4a13b3bec3b4 (arm64: dts: imx: add Zii Ultra board support) Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi index 087b5b6ebe89..32ce14936b01 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi @@ -88,7 +88,7 @@ regulator-name = "0V9_ARM"; regulator-min-microvolt = <900000>; regulator-max-microvolt = <1000000>; - gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; + gpios = <&gpio3 16 GPIO_ACTIVE_HIGH>; states = <1000000 0x1 900000 0x0>; regulator-always-on; -- cgit v1.2.3 From 706ad6746a66546daf96d4e4a95e46faf6cf689a Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Nov 2019 00:09:20 +0900 Subject: ALSA: bebob: fix to detect configured source of sampling clock for Focusrite Saffire Pro i/o series For Focusrite Saffire Pro i/o, the lowest 8 bits of register represents configured source of sampling clock. The next lowest 8 bits represents whether the configured source is actually detected or not just after the register is changed for the source. Current implementation evaluates whole the register to detect configured source. This results in failure due to the next lowest 8 bits when the source is connected in advance. This commit fixes the bug. Fixes: 25784ec2d034 ("ALSA: bebob: Add support for Focusrite Saffire/SaffirePro series") Cc: # v3.16+ Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20191102150920.20367-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/bebob/bebob_focusrite.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/firewire/bebob/bebob_focusrite.c b/sound/firewire/bebob/bebob_focusrite.c index 32b864bee25f..06d6a37cd853 100644 --- a/sound/firewire/bebob/bebob_focusrite.c +++ b/sound/firewire/bebob/bebob_focusrite.c @@ -27,6 +27,8 @@ #define SAFFIRE_CLOCK_SOURCE_SPDIF 1 /* clock sources as returned from register of Saffire Pro 10 and 26 */ +#define SAFFIREPRO_CLOCK_SOURCE_SELECT_MASK 0x000000ff +#define SAFFIREPRO_CLOCK_SOURCE_DETECT_MASK 0x0000ff00 #define SAFFIREPRO_CLOCK_SOURCE_INTERNAL 0 #define SAFFIREPRO_CLOCK_SOURCE_SKIP 1 /* never used on hardware */ #define SAFFIREPRO_CLOCK_SOURCE_SPDIF 2 @@ -189,6 +191,7 @@ saffirepro_both_clk_src_get(struct snd_bebob *bebob, unsigned int *id) map = saffirepro_clk_maps[1]; /* In a case that this driver cannot handle the value of register. */ + value &= SAFFIREPRO_CLOCK_SOURCE_SELECT_MASK; if (value >= SAFFIREPRO_CLOCK_SOURCE_COUNT || map[value] < 0) { err = -EIO; goto end; -- cgit v1.2.3 From b330f3972f4f2a829d41fb9e9b552bec7d73a840 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 27 Sep 2019 11:47:08 +0200 Subject: fbdev: c2p: Fix link failure on non-inlining When the compiler decides not to inline the Chunky-to-Planar core functions, the build fails with: c2p_planar.c:(.text+0xd6): undefined reference to `c2p_unsupported' c2p_planar.c:(.text+0x1dc): undefined reference to `c2p_unsupported' c2p_iplan2.c:(.text+0xc4): undefined reference to `c2p_unsupported' c2p_iplan2.c:(.text+0x150): undefined reference to `c2p_unsupported' Fix this by marking the functions __always_inline. While this could be triggered before by manually enabling both CONFIG_OPTIMIZE_INLINING and CONFIG_CC_OPTIMIZE_FOR_SIZE, it was exposed in the m68k defconfig by commit ac7c3e4ff401b304 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly"). Fixes: 9012d011660ea5cf ("compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING") Reported-by: noreply@ellerman.id.au Signed-off-by: Geert Uytterhoeven Reviewed-by: Masahiro Yamada Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190927094708.11563-1-geert@linux-m68k.org --- drivers/video/fbdev/c2p_core.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/c2p_core.h b/drivers/video/fbdev/c2p_core.h index e1035a865fb9..45a6d895a7d7 100644 --- a/drivers/video/fbdev/c2p_core.h +++ b/drivers/video/fbdev/c2p_core.h @@ -29,7 +29,7 @@ static inline void _transp(u32 d[], unsigned int i1, unsigned int i2, extern void c2p_unsupported(void); -static inline u32 get_mask(unsigned int n) +static __always_inline u32 get_mask(unsigned int n) { switch (n) { case 1: @@ -57,7 +57,7 @@ static inline u32 get_mask(unsigned int n) * Transpose operations on 8 32-bit words */ -static inline void transp8(u32 d[], unsigned int n, unsigned int m) +static __always_inline void transp8(u32 d[], unsigned int n, unsigned int m) { u32 mask = get_mask(n); @@ -99,7 +99,7 @@ static inline void transp8(u32 d[], unsigned int n, unsigned int m) * Transpose operations on 4 32-bit words */ -static inline void transp4(u32 d[], unsigned int n, unsigned int m) +static __always_inline void transp4(u32 d[], unsigned int n, unsigned int m) { u32 mask = get_mask(n); @@ -126,7 +126,7 @@ static inline void transp4(u32 d[], unsigned int n, unsigned int m) * Transpose operations on 4 32-bit words (reverse order) */ -static inline void transp4x(u32 d[], unsigned int n, unsigned int m) +static __always_inline void transp4x(u32 d[], unsigned int n, unsigned int m) { u32 mask = get_mask(n); -- cgit v1.2.3 From 5dc7d5bc9627eb26d33c7c7eefc467cf217f9326 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 1 Nov 2019 12:06:35 -0500 Subject: ASoC: hdac_hda: fix race in device removal When ASoC card instance is removed containing a HDA codec, hdac_hda_codec_remove() may run in parallel with codec resume. This will cause problems if the HDA link is freed with snd_hdac_ext_bus_link_put() while the codec is still in middle of its resume process. To fix this, change the order such that pm_runtime_disable() is called before the link is freed. This will ensure any pending runtime PM action is completed before proceeding to free the link. This issue can be easily hit with e.g. SOF driver by loading and unloading the drivers. Signed-off-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191101170635.26389-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c index 91242b6f8ea7..4570f662fb48 100644 --- a/sound/soc/codecs/hdac_hda.c +++ b/sound/soc/codecs/hdac_hda.c @@ -410,8 +410,8 @@ static void hdac_hda_codec_remove(struct snd_soc_component *component) return; } - snd_hdac_ext_bus_link_put(hdev->bus, hlink); pm_runtime_disable(&hdev->dev); + snd_hdac_ext_bus_link_put(hdev->bus, hlink); } static const struct snd_soc_dapm_route hdac_hda_dapm_routes[] = { -- cgit v1.2.3 From 87c0b9c79ec136ea76a14a88d675a746bc6a87f9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 28 Oct 2019 09:06:45 +0200 Subject: intel_th: gth: Fix the window switching sequence Commit 8116db57cf16 ("intel_th: Add switch triggering support") added a trigger assertion of the CTS, but forgot to de-assert it at the end of the sequence. This results in window switches randomly not happening. Fix that by de-asserting the trigger at the end of the window switch sequence. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Fixes: 8116db57cf16 ("intel_th: Add switch triggering support") Cc: stable Link: https://lore.kernel.org/r/20191028070651.9770-2-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/gth.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c index fa9d34af87ac..f72803a02391 100644 --- a/drivers/hwtracing/intel_th/gth.c +++ b/drivers/hwtracing/intel_th/gth.c @@ -626,6 +626,9 @@ static void intel_th_gth_switch(struct intel_th_device *thdev, if (!count) dev_dbg(&thdev->dev, "timeout waiting for CTS Trigger\n"); + /* De-assert the trigger */ + iowrite32(0, gth->base + REG_CTS_CTL); + intel_th_gth_stop(gth, output, false); intel_th_gth_start(gth, output); } -- cgit v1.2.3 From e5a340f770278f4de42e8bac19f2ebeb77ddfae4 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 28 Oct 2019 09:06:46 +0200 Subject: intel_th: msu: Fix an uninitialized mutex Commit 615c164da0eb ("intel_th: msu: Introduce buffer interface") added a mutex that it forgot to initialize, resulting in a lockdep splat. Fix that by initializing the mutex statically. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Fixes: 615c164da0eb ("intel_th: msu: Introduce buffer interface") Link: https://lore.kernel.org/r/20191028070651.9770-3-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index fc9f15f36ad4..51021020fa3f 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -164,7 +164,7 @@ struct msc { }; static LIST_HEAD(msu_buffer_list); -static struct mutex msu_buffer_mutex; +static DEFINE_MUTEX(msu_buffer_mutex); /** * struct msu_buffer_entry - internal MSU buffer bookkeeping -- cgit v1.2.3 From 063f097fd65a90fca2cd49411a2d6e35b8ca25db Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 28 Oct 2019 09:06:47 +0200 Subject: intel_th: msu: Fix missing allocation failure check on a kstrndup Commit 615c164da0eb ("intel_th: msu: Introduce buffer interface") forgot to add a NULL pointer check for the value returned from kstrdup(), which will be troublesome if the allocation fails. Fix that by adding the check. Addresses-Coverity: ("Dereference null return") Fixes: 615c164da0eb ("intel_th: msu: Introduce buffer interface") Signed-off-by: Colin Ian King [alexander.shishkin: amended the commit message] Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/lkml/20190726120421.9650-1-colin.king@canonical.com/ Link: https://lore.kernel.org/r/20191028070651.9770-4-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 51021020fa3f..201a166fdff5 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -1848,6 +1848,9 @@ mode_store(struct device *dev, struct device_attribute *attr, const char *buf, len = cp - buf; mode = kstrndup(buf, len, GFP_KERNEL); + if (!mode) + return -ENOMEM; + i = match_string(msc_mode, ARRAY_SIZE(msc_mode), mode); if (i >= 0) goto found; -- cgit v1.2.3 From 8e3ef7b444aec3d1059085ce41edaa76ee7340e7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 28 Oct 2019 09:06:48 +0200 Subject: intel_th: msu: Fix overflow in shift of an unsigned int The shift of the unsigned int win->nr_blocks by PAGE_SHIFT may potentially overflow. Note that the intended return of this shift is expected to be a size_t however the shift is being performed as an unsigned int. Fix this by casting win->nr_blocks to a size_t before performing the shift. Addresses-Coverity: ("Unintentional integer overflow") Fixes: 615c164da0eb ("intel_th: msu: Introduce buffer interface") Signed-off-by: Colin Ian King Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/lkml/20190726113151.8967-1-colin.king@canonical.com/ Link: https://lore.kernel.org/r/20191028070651.9770-5-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 201a166fdff5..9dc9ae87b5e5 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -327,7 +327,7 @@ static size_t msc_win_total_sz(struct msc_window *win) struct msc_block_desc *bdesc = sg_virt(sg); if (msc_block_wrapped(bdesc)) - return win->nr_blocks << PAGE_SHIFT; + return (size_t)win->nr_blocks << PAGE_SHIFT; size += msc_total_sz(bdesc); if (msc_block_last_written(bdesc)) -- cgit v1.2.3 From 1fa1b6ca0fda97cbfccdc6b80b1a6b2920751665 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 28 Oct 2019 09:06:49 +0200 Subject: intel_th: msu: Fix possible memory leak in mode_store() 'mode' is malloced in mode_store() and should be freed before leaving from the error handling cases, otherwise it will cause memory leak. Fixes: 615c164da0eb ("intel_th: msu: Introduce buffer interface") Signed-off-by: Wei Yongjun Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/lkml/20190801013825.182543-1-weiyongjun1@huawei.com/ Link: https://lore.kernel.org/r/20191028070651.9770-6-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 9dc9ae87b5e5..6d240dfae9d9 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -1852,8 +1852,10 @@ mode_store(struct device *dev, struct device_attribute *attr, const char *buf, return -ENOMEM; i = match_string(msc_mode, ARRAY_SIZE(msc_mode), mode); - if (i >= 0) + if (i >= 0) { + kfree(mode); goto found; + } /* Buffer sinks only work with a usable IRQ */ if (!msc->do_irq) { -- cgit v1.2.3 From 3adbb5718dd5264666ddbc2b9b43799d292e9cb6 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 28 Oct 2019 09:06:50 +0200 Subject: intel_th: pci: Add Comet Lake PCH support This adds support for Intel TH on Comet Lake PCH. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191028070651.9770-7-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 91dfeba62485..4088e1865b07 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -199,6 +199,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x02a6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Comet Lake PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x06a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Ice Lake NNPI */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5), -- cgit v1.2.3 From 9d55499d8da49e9261e95a490f3fda41d955f505 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 28 Oct 2019 09:06:51 +0200 Subject: intel_th: pci: Add Jasper Lake PCH support This adds support for Intel TH on Jasper Lake PCH. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191028070651.9770-8-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 4088e1865b07..03ca5b1bef9f 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -214,6 +214,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Jasper Lake PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { 0 }, }; -- cgit v1.2.3 From de2a60522343a6cab998f61fd906eae445b19963 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Oct 2019 15:07:06 +0100 Subject: netfilter: nf_tables_offload: check for register data length mismatches Make sure register data length does not mismatch immediate data length, otherwise hit EOPNOTSUPP. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_bitwise.c | 5 +++-- net/netfilter/nft_cmp.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 974300178fa9..02afa752dd2e 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -134,12 +134,13 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx, const struct nft_expr *expr) { const struct nft_bitwise *priv = nft_expr_priv(expr); + struct nft_offload_reg *reg = &ctx->regs[priv->dreg]; if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) || - priv->sreg != priv->dreg) + priv->sreg != priv->dreg || priv->len != reg->len) return -EOPNOTSUPP; - memcpy(&ctx->regs[priv->dreg].mask, &priv->mask, sizeof(priv->mask)); + memcpy(®->mask, &priv->mask, sizeof(priv->mask)); return 0; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index bd173b1824c6..0744b2bb46da 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -116,7 +116,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx, u8 *mask = (u8 *)&flow->match.mask; u8 *key = (u8 *)&flow->match.key; - if (priv->op != NFT_CMP_EQ) + if (priv->op != NFT_CMP_EQ || reg->len != priv->len) return -EOPNOTSUPP; memcpy(key + reg->offset, &priv->data, priv->len); -- cgit v1.2.3 From 9982b0f69b49931b652d35f86f519be2ccfc7027 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 2 Oct 2019 11:34:36 +0300 Subject: clk: ti: dra7-atl-clock: Remove ti_clk_add_alias call ti_clk_register() calls it already so the driver should not create duplicated alias. Signed-off-by: Peter Ujfalusi Link: https://lkml.kernel.org/r/20191002083436.10194-1-peter.ujfalusi@ti.com Signed-off-by: Stephen Boyd --- drivers/clk/ti/clk-dra7-atl.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c index a01ca9395179..f65e16c4f3c4 100644 --- a/drivers/clk/ti/clk-dra7-atl.c +++ b/drivers/clk/ti/clk-dra7-atl.c @@ -174,7 +174,6 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node) struct clk_init_data init = { NULL }; const char **parent_names = NULL; struct clk *clk; - int ret; clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL); if (!clk_hw) { @@ -207,11 +206,6 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node) clk = ti_clk_register(NULL, &clk_hw->hw, node->name); if (!IS_ERR(clk)) { - ret = ti_clk_add_alias(NULL, clk, node->name); - if (ret) { - clk_unregister(clk); - goto cleanup; - } of_clk_add_provider(node, of_clk_src_simple_get, clk); kfree(parent_names); return; -- cgit v1.2.3 From 81a41901ffd46bac6df4c95b8290ac259e0feda8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 30 Sep 2019 08:40:01 -0700 Subject: clk: ti: clkctrl: Fix failed to enable error with double udelay timeout Commit 3d8598fb9c5a ("clk: ti: clkctrl: use fallback udelay approach if timekeeping is suspended") added handling for cases when timekeeping is suspended. But looks like we can still get occasional "failed to enable" errors on the PM runtime resume path with udelay() returning faster than expected. With ti-sysc interconnect target module driver this leads into device failure with PM runtime failing with "failed to enable" clkctrl error. Let's fix the issue with a delay of two times the desired delay as in often done for udelay() to account for the inaccuracy. Fixes: 3d8598fb9c5a ("clk: ti: clkctrl: use fallback udelay approach if timekeeping is suspended") Cc: Keerthy Cc: Tero Kristo Signed-off-by: Tony Lindgren Link: https://lkml.kernel.org/r/20190930154001.46581-1-tony@atomide.com Tested-by: Keerthy Signed-off-by: Stephen Boyd --- drivers/clk/ti/clkctrl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c index 975995eea15c..b0c0690a5a12 100644 --- a/drivers/clk/ti/clkctrl.c +++ b/drivers/clk/ti/clkctrl.c @@ -100,11 +100,12 @@ static bool _omap4_is_timeout(union omap4_timeout *time, u32 timeout) * can be from a timer that requires pm_runtime access, which * will eventually bring us here with timekeeping_suspended, * during both suspend entry and resume paths. This happens - * at least on am43xx platform. + * at least on am43xx platform. Account for flakeyness + * with udelay() by multiplying the timeout value by 2. */ if (unlikely(_early_timeout || timekeeping_suspended)) { if (time->cycles++ < timeout) { - udelay(1); + udelay(1 * 2); return false; } } else { -- cgit v1.2.3 From c43eab3eddb4c6742ac20138659a9b701822b274 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Mon, 4 Nov 2019 23:50:00 +0800 Subject: net: fec: add missed clk_disable_unprepare in remove This driver forgets to disable and unprepare clks when remove. Add calls to clk_disable_unprepare to fix it. Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 22c01b224baa..a9c386b63581 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3645,6 +3645,8 @@ fec_drv_remove(struct platform_device *pdev) regulator_disable(fep->reg_phy); pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); -- cgit v1.2.3 From 3d1e5039f5f87a8731202ceca08764ee7cb010d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Nov 2019 07:57:55 -0800 Subject: dccp: do not leak jiffies on the wire For some reason I missed the case of DCCP passive flows in my previous patch. Fixes: a904a0693c18 ("inet: stop leaking jiffies on the wire") Signed-off-by: Eric Dumazet Reported-by: Thiemo Nagel Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0d8f782c25cc..d19557c6d04b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -416,7 +416,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; - newinet->inet_id = jiffies; + newinet->inet_id = prandom_u32(); if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) goto put_and_exit; -- cgit v1.2.3 From 30b7244d79651460ff114ba8f7987ed94c86b99a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 24 Aug 2019 17:49:55 +0300 Subject: netfilter: ipset: Fix an error code in ip_set_sockfn_get() The copy_to_user() function returns the number of bytes remaining to be copied. In this code, that positive return is checked at the end of the function and we return zero/success. What we should do instead is return -EFAULT. Fixes: a7b4f989a629 ("netfilter: ipset: IP set core support") Signed-off-by: Dan Carpenter Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e64d5f9a89dd..e7288eab7512 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -2069,8 +2069,9 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_version->version = IPSET_PROTOCOL; - ret = copy_to_user(user, req_version, - sizeof(struct ip_set_req_version)); + if (copy_to_user(user, req_version, + sizeof(struct ip_set_req_version))) + ret = -EFAULT; goto done; } case IP_SET_OP_GET_BYNAME: { @@ -2129,7 +2130,8 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } /* end of switch(op) */ copy: - ret = copy_to_user(user, data, copylen); + if (copy_to_user(user, data, copylen)) + ret = -EFAULT; done: vfree(data); -- cgit v1.2.3 From 97664bc2c77e2b65cdedddcae2643fc93291d958 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 10 Oct 2019 19:18:14 +0200 Subject: netfilter: ipset: Copy the right MAC address in hash:ip,mac IPv6 sets Same as commit 1b4a75108d5b ("netfilter: ipset: Copy the right MAC address in bitmap:ip,mac and hash:ip,mac sets"), another copy and paste went wrong in commit 8cc4ccf58379 ("netfilter: ipset: Allow matching on destination MAC address for mac and ipmac sets"). When I fixed this for IPv4 in 1b4a75108d5b, I didn't realise that hash:ip,mac sets also support IPv6 as family, and this is covered by a separate function, hash_ipmac6_kadt(). In hash:ip,mac sets, the first dimension is the IP address, and the second dimension is the MAC address: check the IPSET_DIM_TWO_SRC flag in flags while deciding which MAC address to copy, destination or source. This way, mixing source and destination matches for the two dimensions of ip,mac hash type works as expected, also for IPv6. With this setup: ip netns add A ip link add veth1 type veth peer name veth2 netns A ip addr add 2001:db8::1/64 dev veth1 ip -net A addr add 2001:db8::2/64 dev veth2 ip link set veth1 up ip -net A link set veth2 up dst=$(ip netns exec A cat /sys/class/net/veth2/address) ip netns exec A ipset create test_hash hash:ip,mac family inet6 ip netns exec A ipset add test_hash 2001:db8::1,${dst} ip netns exec A ip6tables -A INPUT -p icmpv6 --icmpv6-type 135 -j ACCEPT ip netns exec A ip6tables -A INPUT -m set ! --match-set test_hash src,dst -j DROP ipset now correctly matches a test packet: # ping -c1 2001:db8::2 >/dev/null # echo $? 0 Reported-by: Chen, Yi Fixes: 8cc4ccf58379 ("netfilter: ipset: Allow matching on destination MAC address for mac and ipmac sets") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_ipmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index 24d8f4df4230..4ce563eb927d 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -209,7 +209,7 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb, (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - if (opt->flags & IPSET_DIM_ONE_SRC) + if (opt->flags & IPSET_DIM_TWO_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); -- cgit v1.2.3 From 1289975643f4cdecb071dc641059a47679fd170f Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 1 Nov 2019 17:13:18 +0100 Subject: netfilter: ipset: Fix nla_policies to fully support NL_VALIDATE_STRICT Since v5.2 (commit "netlink: re-add parse/validate functions in strict mode") NL_VALIDATE_STRICT is enabled. Fix the ipset nla_policies which did not support strict mode and convert from deprecated parsings to verified ones. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 41 ++++++++++++++++++++++---------- net/netfilter/ipset/ip_set_hash_net.c | 1 + net/netfilter/ipset/ip_set_hash_netnet.c | 1 + 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e7288eab7512..d73d1828216a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -296,7 +296,8 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, + ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) return -IPSET_ERR_PROTOCOL; @@ -314,7 +315,8 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, + ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) return -IPSET_ERR_PROTOCOL; @@ -934,7 +936,8 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && - nla_parse_nested_deprecated(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy, NULL)) { + nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], + set->type->create_policy, NULL)) { ret = -IPSET_ERR_PROTOCOL; goto put_out; } @@ -1281,6 +1284,14 @@ dump_attrs(struct nlmsghdr *nlh) } } +static const struct nla_policy +ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_FLAGS] = { .type = NLA_U32 }, +}; + static int dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { @@ -1292,9 +1303,9 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) ip_set_id_t index; int ret; - ret = nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, attr, - nlh->nlmsg_len - min_len, - ip_set_setname_policy, NULL); + ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, + nlh->nlmsg_len - min_len, + ip_set_dump_policy, NULL); if (ret) return ret; @@ -1543,9 +1554,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); cmdattr = (void *)&errmsg->msg + min_len; - ret = nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, cmdattr, - nlh->nlmsg_len - min_len, - ip_set_adt_policy, NULL); + ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, + nlh->nlmsg_len - min_len, ip_set_adt_policy, + NULL); if (ret) { nlmsg_free(skb2); @@ -1596,7 +1607,9 @@ static int ip_set_ad(struct net *net, struct sock *ctnl, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, + attr[IPSET_ATTR_DATA], + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, adt, flags, use_lineno); @@ -1606,7 +1619,8 @@ static int ip_set_ad(struct net *net, struct sock *ctnl, nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL)) + nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, adt, flags, use_lineno); @@ -1655,7 +1669,8 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (!set) return -ENOENT; - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); @@ -1961,7 +1976,7 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { [IPSET_CMD_LIST] = { .call = ip_set_dump, .attr_count = IPSET_ATTR_CMD_MAX, - .policy = ip_set_setname_policy, + .policy = ip_set_dump_policy, }, [IPSET_CMD_SAVE] = { .call = ip_set_dump, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c259cbc3ef45..3d932de0ad29 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -368,6 +368,7 @@ static struct ip_set_type hash_net_type __read_mostly = { [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index a3ae69bfee66..4398322fad59 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -476,6 +476,7 @@ static struct ip_set_type hash_netnet_type __read_mostly = { [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, -- cgit v1.2.3 From 250367c59e6ba0d79d702a059712d66edacd4a1a Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 31 Oct 2019 11:06:24 +0100 Subject: netfilter: nf_tables: Align nft_expr private data to 64-bit Invoking the following commands on a 32-bit architecture with strict alignment requirements (such as an ARMv7-based Raspberry Pi) results in an alignment exception: # nft add table ip test-ip4 # nft add chain ip test-ip4 output { type filter hook output priority 0; } # nft add rule ip test-ip4 output quota 1025 bytes Alignment trap: not handling instruction e1b26f9f at [<7f4473f8>] Unhandled fault: alignment exception (0x001) at 0xb832e824 Internal error: : 1 [#1] PREEMPT SMP ARM Hardware name: BCM2835 [<7f4473fc>] (nft_quota_do_init [nft_quota]) [<7f447448>] (nft_quota_init [nft_quota]) [<7f4260d0>] (nf_tables_newrule [nf_tables]) [<7f4168dc>] (nfnetlink_rcv_batch [nfnetlink]) [<7f416bd0>] (nfnetlink_rcv [nfnetlink]) [<8078b334>] (netlink_unicast) [<8078b664>] (netlink_sendmsg) [<8071b47c>] (sock_sendmsg) [<8071bd18>] (___sys_sendmsg) [<8071ce3c>] (__sys_sendmsg) [<8071ce94>] (sys_sendmsg) The reason is that nft_quota_do_init() calls atomic64_set() on an atomic64_t which is only aligned to 32-bit, not 64-bit, because it succeeds struct nft_expr in memory which only contains a 32-bit pointer. Fix by aligning the nft_expr private data to 64-bit. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 001d294edf57..2d0275f13bbf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -820,7 +820,8 @@ struct nft_expr_ops { */ struct nft_expr { const struct nft_expr_ops *ops; - unsigned char data[]; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); }; static inline void *nft_expr_priv(const struct nft_expr *expr) -- cgit v1.2.3 From 9fedd894b4e1c7ad5e5f711899f6a0a1da01d996 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Sat, 2 Nov 2019 21:59:44 +0100 Subject: netfilter: nf_tables: fix unexpected EOPNOTSUPP error If the object type doesn't implement an update operation and the user tries to update it will silently ignore the update operation. Fixes: aa4095a156b5 ("netfilter: nf_tables: fix possible null-pointer dereference in object update") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d481f9baca2f..aa26841ad9a1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5143,9 +5143,6 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, struct nft_trans *trans; int err; - if (!obj->ops->update) - return -EOPNOTSUPP; - trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, sizeof(struct nft_trans_obj)); if (!trans) @@ -6499,7 +6496,8 @@ static void nft_obj_commit_update(struct nft_trans *trans) obj = nft_trans_obj(trans); newobj = nft_trans_obj_newobj(trans); - obj->ops->update(obj, newobj); + if (obj->ops->update) + obj->ops->update(obj, newobj); kfree(newobj); } -- cgit v1.2.3 From b23c0742c2ce7e33ed79d10e451f70fdb5ca85d1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 Nov 2019 20:54:28 +0100 Subject: bridge: ebtables: don't crash when using dnat target in output chains MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xt_in() returns NULL in the output hook, skip the pkt_type change for that case, redirection only makes sense in broute/prerouting hooks. Reported-by: Tom Yan Cc: Linus Lüssing Fixes: cf3cb246e277d ("bridge: ebtables: fix reception of frames DNAT-ed to bridge device/port") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_dnat.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index ed91ea31978a..12a4f4d93681 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -20,7 +20,6 @@ static unsigned int ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - struct net_device *dev; if (skb_ensure_writable(skb, ETH_ALEN)) return EBT_DROP; @@ -33,10 +32,22 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) else skb->pkt_type = PACKET_MULTICAST; } else { - if (xt_hooknum(par) != NF_BR_BROUTING) - dev = br_port_get_rcu(xt_in(par))->br->dev; - else + const struct net_device *dev; + + switch (xt_hooknum(par)) { + case NF_BR_BROUTING: dev = xt_in(par); + break; + case NF_BR_PRE_ROUTING: + dev = br_port_get_rcu(xt_in(par))->br->dev; + break; + default: + dev = NULL; + break; + } + + if (!dev) /* NF_BR_LOCAL_OUT */ + return info->target; if (ether_addr_equal(info->mac, dev->dev_addr)) skb->pkt_type = PACKET_HOST; -- cgit v1.2.3 From 1ed012f6fd83e7ee7efd22e2c32f23efff015b30 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Nov 2019 14:52:41 +0100 Subject: netfilter: nf_tables: bogus EOPNOTSUPP on basechain update Userspace never includes the NFT_BASE_CHAIN flag, this flag is inferred from the NFTA_CHAIN_HOOK atribute. The chain update path does not allow to update flags at this stage, the existing sanity check bogusly hits EOPNOTSUPP in the basechain case if the offload flag is set on. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index aa26841ad9a1..712a428509ad 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1922,6 +1922,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + flags |= chain->flags & NFT_BASE_CHAIN; return nf_tables_updchain(&ctx, genmask, policy, flags); } -- cgit v1.2.3 From 88c749840dff58e7a40e18bf9bdace15f27ef259 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Nov 2019 14:52:42 +0100 Subject: netfilter: nf_tables_offload: skip EBUSY on chain update Do not try to bind a chain again if it exists, otherwise the driver returns EBUSY. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index ad783f4840ef..e25dab8128db 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -334,7 +334,8 @@ int nft_flow_rule_offload_commit(struct net *net) switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + nft_trans_chain_update(trans)) continue; policy = nft_trans_chain_policy(trans); -- cgit v1.2.3 From b0c51f158455e31d5024100cf3580fcd88214b0e Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 30 Oct 2019 08:25:45 +0100 Subject: stacktrace: Don't skip first entry on noncurrent tasks When doing cat /proc//stack, the output is missing the first entry. When the current code walks the stack starting in stack_trace_save_tsk, it skips all scheduler functions (that's OK) plus one more function. But this one function should be skipped only for the 'current' task as it is stack_trace_save_tsk proper. The original code (before the common infrastructure) skipped one function only for the 'current' task -- see save_stack_trace_tsk before 3599fe12a125. So do so also in the new infrastructure now. Fixes: 214d8ca6ee85 ("stacktrace: Provide common infrastructure") Signed-off-by: Jiri Slaby Signed-off-by: Thomas Gleixner Tested-by: Michal Suchanek Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20191030072545.19462-1-jslaby@suse.cz --- kernel/stacktrace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 6d1f68b7e528..c9ea7eb2cb1a 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -141,7 +141,8 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store, struct stacktrace_cookie c = { .store = store, .size = size, - .skip = skipnr + 1, + /* skip this function if they are tracing us */ + .skip = skipnr + !!(current == tsk), }; if (!try_get_task_stack(tsk)) @@ -298,7 +299,8 @@ unsigned int stack_trace_save_tsk(struct task_struct *task, struct stack_trace trace = { .entries = store, .max_entries = size, - .skip = skipnr + 1, + /* skip this function if they are tracing us */ + .skip = skipnr + !!(current == task), }; save_stack_trace_tsk(task, &trace); -- cgit v1.2.3 From d98da49977f67394db492f06c00b1fb1cc090c05 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 11 Oct 2019 09:03:54 -0400 Subject: btrfs: save i_size to avoid double evaluation of i_size_read in compress_file_range We hit a regression while rolling out 5.2 internally where we were hitting the following panic kernel BUG at mm/page-writeback.c:2659! RIP: 0010:clear_page_dirty_for_io+0xe6/0x1f0 Call Trace: __process_pages_contig+0x25a/0x350 ? extent_clear_unlock_delalloc+0x43/0x70 submit_compressed_extents+0x359/0x4d0 normal_work_helper+0x15a/0x330 process_one_work+0x1f5/0x3f0 worker_thread+0x2d/0x3d0 ? rescuer_thread+0x340/0x340 kthread+0x111/0x130 ? kthread_create_on_node+0x60/0x60 ret_from_fork+0x1f/0x30 This is happening because the page is not locked when doing clear_page_dirty_for_io. Looking at the core dump it was because our async_extent had a ram_size of 24576 but our async_chunk range only spanned 20480, so we had a whole extra page in our ram_size for our async_extent. This happened because we try not to compress pages outside of our i_size, however a cleanup patch changed us to do actual_end = min_t(u64, i_size_read(inode), end + 1); which is problematic because i_size_read() can evaluate to different values in between checking and assigning. So either an expanding truncate or a fallocate could increase our i_size while we're doing writeout and actual_end would end up being past the range we have locked. I confirmed this was what was happening by installing a debug kernel that had actual_end = min_t(u64, i_size_read(inode), end + 1); if (actual_end > end + 1) { printk(KERN_ERR "KABOOM\n"); actual_end = end + 1; } and installing it onto 500 boxes of the tier that had been seeing the problem regularly. Last night I got my debug message and no panic, confirming what I expected. [ dsterba: the assembly confirms a tiny race window: mov 0x20(%rsp),%rax cmp %rax,0x48(%r15) # read movl $0x0,0x18(%rsp) mov %rax,%r12 mov %r14,%rax cmovbe 0x48(%r15),%r12 # eval Where r15 is inode and 0x48 is offset of i_size. The original fix was to revert 62b37622718c that would do an intermediate assignment and this would also avoid the doulble evaluation but is not future-proof, should the compiler merge the stores and call i_size_read anyway. There's a patch adding READ_ONCE to i_size_read but that's not being applied at the moment and we need to fix the bug. Instead, emulate READ_ONCE by two barrier()s that's what effectively happens. The assembly confirms single evaluation: mov 0x48(%rbp),%rax # read once mov 0x20(%rsp),%rcx mov $0x20,%edx cmp %rax,%rcx cmovbe %rcx,%rax mov %rax,(%rsp) mov %rax,%rcx mov %r14,%rax Where 0x48(%rbp) is inode->i_size stored to %eax. ] Fixes: 62b37622718c ("btrfs: Remove isize local variable in compress_file_range") CC: stable@vger.kernel.org # v5.1+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ changelog updated ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c3f386b7cc0b..c6dc4dd16cf7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -474,6 +474,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk) u64 start = async_chunk->start; u64 end = async_chunk->end; u64 actual_end; + u64 i_size; int ret = 0; struct page **pages = NULL; unsigned long nr_pages; @@ -488,7 +489,19 @@ static noinline int compress_file_range(struct async_chunk *async_chunk) inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1, SZ_16K); - actual_end = min_t(u64, i_size_read(inode), end + 1); + /* + * We need to save i_size before now because it could change in between + * us evaluating the size and assigning it. This is because we lock and + * unlock the page in truncate and fallocate, and then modify the i_size + * later on. + * + * The barriers are to emulate READ_ONCE, remove that once i_size_read + * does that for us. + */ + barrier(); + i_size = i_size_read(inode); + barrier(); + actual_end = min_t(u64, i_size, end + 1); again: will_compress = 0; nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; -- cgit v1.2.3 From a5009d3a318e9f02ddc9aa3d55e2c64d6285c4b9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 4 Nov 2019 21:29:30 +0100 Subject: btrfs: un-deprecate ioctls START_SYNC and WAIT_SYNC The two ioctls START_SYNC and WAIT_SYNC were mistakenly marked as deprecated and scheduled for removal but we actualy do use them for 'btrfs subvolume delete -C/-c'. The deprecated thing in ebc87351e5fc should have been just the async flag for subvolume creation. The deprecation has been added in this development cycle, remove it until it's time. Fixes: ebc87351e5fc ("btrfs: Deprecate BTRFS_SUBVOL_CREATE_ASYNC flag") Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7c145a41decd..23272d9154f3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4195,9 +4195,6 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, u64 transid; int ret; - btrfs_warn(root->fs_info, - "START_SYNC ioctl is deprecated and will be removed in kernel 5.7"); - trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT) @@ -4225,9 +4222,6 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info, { u64 transid; - btrfs_warn(fs_info, - "WAIT_SYNC ioctl is deprecated and will be removed in kernel 5.7"); - if (argp) { if (copy_from_user(&transid, argp, sizeof(transid))) return -EFAULT; -- cgit v1.2.3 From db9ee384f6f71f7c5296ce85b7c1a2a2527e7c72 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Sat, 28 Sep 2019 22:29:05 +0800 Subject: can: dev: add missing of_node_put() after calling of_get_child_by_name() of_node_put() needs to be called when the device node which is got from of_get_child_by_name() finished using. Fixes: 2290aefa2e90 ("can: dev: Add support for limiting configured bitrate") Cc: Franklin S Cooper Jr Signed-off-by: Wen Yang Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index ac86be52b461..1c88c361938c 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -848,6 +848,7 @@ void of_can_transceiver(struct net_device *dev) return; ret = of_property_read_u32(dn, "max-bitrate", &priv->bitrate_max); + of_node_put(dn); if ((ret && ret != -EINVAL) || (!ret && !priv->bitrate_max)) netdev_warn(dev, "Invalid value for transceiver max bitrate. Ignoring bitrate limit.\n"); } -- cgit v1.2.3 From fb5be6a7b4863ecc44963bb80ca614584b6c7817 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 19 Sep 2019 21:44:38 -0500 Subject: can: gs_usb: gs_can_open(): prevent memory leak In gs_can_open() if usb_submit_urb() fails the allocated urb should be released. Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Cc: linux-stable Signed-off-by: Navid Emamdoost Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index bd6eb9967630..2f74f6704c12 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -623,6 +623,7 @@ static int gs_can_open(struct net_device *netdev) rc); usb_unanchor_urb(urb); + usb_free_urb(urb); break; } -- cgit v1.2.3 From 4d6636498c41891d0482a914dd570343a838ad79 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Oct 2019 12:29:13 +0200 Subject: can: mcba_usb: fix use-after-free on disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver was accessing its driver data after having freed it. Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer") Cc: stable # 4.12 Cc: Remigiusz Kołłątaj Reported-by: syzbot+e29b17e5042bbc56fae9@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 19a702ac49e4..21faa2ec4632 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -876,9 +876,8 @@ static void mcba_usb_disconnect(struct usb_interface *intf) netdev_info(priv->netdev, "device disconnected\n"); unregister_candev(priv->netdev); - free_candev(priv->netdev); - mcba_urb_unlink(priv); + free_candev(priv->netdev); } static struct usb_driver mcba_usb_driver = { -- cgit v1.2.3 From 3759739426186a924675651b388d1c3963c5710e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Oct 2019 12:29:14 +0200 Subject: can: usb_8dev: fix use-after-free on disconnect The driver was accessing its driver data after having freed it. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Cc: stable # 3.9 Cc: Bernd Krumboeck Cc: Wolfgang Grandegger Signed-off-by: Johan Hovold Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/usb_8dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index d596a2ad7f78..8fa224b28218 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -996,9 +996,8 @@ static void usb_8dev_disconnect(struct usb_interface *intf) netdev_info(priv->netdev, "device disconnected\n"); unregister_netdev(priv->netdev); - free_candev(priv->netdev); - unlink_all_urbs(priv); + free_candev(priv->netdev); } } -- cgit v1.2.3 From 5e269324db5adb2f5f6ec9a93a9c7b0672932b47 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 15 Aug 2019 08:00:26 +0000 Subject: can: flexcan: disable completely the ECC mechanism The ECC (memory error detection and correction) mechanism can be activated or not, controlled by the ECCDIS bit in CAN_MECR. When disabled, updates on indications and reporting registers are stopped. So if want to disable ECC completely, had better assert ECCDIS bit, not just mask the related interrupts. Fixes: cdce844865be ("can: flexcan: add vf610 support for FlexCAN") Signed-off-by: Joakim Zhang Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index dc5695dffc2e..1cd5179cb876 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1188,6 +1188,7 @@ static int flexcan_chip_start(struct net_device *dev) reg_mecr = priv->read(®s->mecr); reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS; priv->write(reg_mecr, ®s->mecr); + reg_mecr |= FLEXCAN_MECR_ECCDIS; reg_mecr &= ~(FLEXCAN_MECR_NCEFAFRZ | FLEXCAN_MECR_HANCEI_MSK | FLEXCAN_MECR_FANCEI_MSK); priv->write(reg_mecr, ®s->mecr); -- cgit v1.2.3 From de280f403f2996679e2607384980703710576fed Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Tue, 8 Oct 2019 10:35:44 +0200 Subject: can: peak_usb: fix a potential out-of-sync while decoding packets When decoding a buffer received from PCAN-USB, the first timestamp read in a packet is a 16-bit coded time base, and the next ones are an 8-bit offset to this base, regardless of the type of packet read. This patch corrects a potential loss of synchronization by using a timestamp index read from the buffer, rather than an index of received data packets, to determine on the sizeof the timestamp to be read from the packet being decoded. Signed-off-by: Stephane Grosjean Fixes: 46be265d3388 ("can: usb: PEAK-System Technik PCAN-USB specific part") Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 617da295b6c1..5a66c9f53aae 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -100,7 +100,7 @@ struct pcan_usb_msg_context { u8 *end; u8 rec_cnt; u8 rec_idx; - u8 rec_data_idx; + u8 rec_ts_idx; struct net_device *netdev; struct pcan_usb *pdev; }; @@ -547,10 +547,15 @@ static int pcan_usb_decode_status(struct pcan_usb_msg_context *mc, mc->ptr += PCAN_USB_CMD_ARGS; if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) { - int err = pcan_usb_decode_ts(mc, !mc->rec_idx); + int err = pcan_usb_decode_ts(mc, !mc->rec_ts_idx); if (err) return err; + + /* Next packet in the buffer will have a timestamp on a single + * byte + */ + mc->rec_ts_idx++; } switch (f) { @@ -632,10 +637,13 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) cf->can_dlc = get_can_dlc(rec_len); - /* first data packet timestamp is a word */ - if (pcan_usb_decode_ts(mc, !mc->rec_data_idx)) + /* Only first packet timestamp is a word */ + if (pcan_usb_decode_ts(mc, !mc->rec_ts_idx)) goto decode_failed; + /* Next packet in the buffer will have a timestamp on a single byte */ + mc->rec_ts_idx++; + /* read data */ memset(cf->data, 0x0, sizeof(cf->data)); if (status_len & PCAN_USB_STATUSLEN_RTR) { @@ -688,7 +696,6 @@ static int pcan_usb_decode_msg(struct peak_usb_device *dev, u8 *ibuf, u32 lbuf) /* handle normal can frames here */ } else { err = pcan_usb_decode_data(&mc, sl); - mc.rec_data_idx++; } } -- cgit v1.2.3 From f7a1337f0d29b98733c8824e165fca3371d7d4fd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 23 Oct 2019 10:27:05 +0200 Subject: can: peak_usb: fix slab info leak Fix a small slab info leak due to a failure to clear the command buffer at allocation. The first 16 bytes of the command buffer are always sent to the device in pcan_usb_send_cmd() even though only the first two may have been initialised in case no argument payload is provided (e.g. when waiting for a response). Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core") Cc: stable # 3.4 Reported-by: syzbot+863724e7128e14b26732@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 65dce642b86b..0b7766b715fd 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -750,7 +750,7 @@ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter, dev = netdev_priv(netdev); /* allocate a buffer large enough to send commands */ - dev->cmd_buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); + dev->cmd_buf = kzalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); if (!dev->cmd_buf) { err = -ENOMEM; goto lbl_free_candev; -- cgit v1.2.3 From 128a1b87d3ceb2ba449d5aadb222fe22395adeb0 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Wed, 25 Sep 2019 08:58:45 +0000 Subject: can: peak_usb: report bus recovery as well While the state changes are reported when the error counters increase and decrease, there is no event when the bus recovers and the error counters decrease again. So add those as well. Change the state going downward to be ERROR_PASSIVE -> ERROR_WARNING -> ERROR_ACTIVE instead of directly to ERROR_ACTIVE again. Signed-off-by: Jeroen Hofstee Cc: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 5a66c9f53aae..d2539c95adb6 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -436,8 +436,8 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, } if ((n & PCAN_USB_ERROR_BUS_LIGHT) == 0) { /* no error (back to active state) */ - mc->pdev->dev.can.state = CAN_STATE_ERROR_ACTIVE; - return 0; + new_state = CAN_STATE_ERROR_ACTIVE; + break; } break; @@ -460,9 +460,9 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, } if ((n & PCAN_USB_ERROR_BUS_HEAVY) == 0) { - /* no error (back to active state) */ - mc->pdev->dev.can.state = CAN_STATE_ERROR_ACTIVE; - return 0; + /* no error (back to warning state) */ + new_state = CAN_STATE_ERROR_WARNING; + break; } break; @@ -501,6 +501,11 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, mc->pdev->dev.can.can_stats.error_warning++; break; + case CAN_STATE_ERROR_ACTIVE: + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_ACTIVE; + break; + default: /* CAN_STATE_MAX (trick to handle other errors) */ cf->can_id |= CAN_ERR_CRTL; -- cgit v1.2.3 From 3cb3eaac52c0f145d895f4b6c22834d5f02b8569 Mon Sep 17 00:00:00 2001 From: Kurt Van Dijck Date: Tue, 1 Oct 2019 09:40:36 +0200 Subject: can: c_can: c_can_poll(): only read status register after status IRQ When the status register is read without the status IRQ pending, the chip may not raise the interrupt line for an upcoming status interrupt and the driver may miss a status interrupt. It is critical that the BUSOFF status interrupt is forwarded to the higher layers, since no more interrupts will follow without intervention. Thanks to Wolfgang and Joe for bringing up the first idea. Signed-off-by: Kurt Van Dijck Cc: Wolfgang Grandegger Cc: Joe Burmeister Fixes: fa39b54ccf28 ("can: c_can: Get rid of pointless interrupts") Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 25 ++++++++++++++++++++----- drivers/net/can/c_can/c_can.h | 1 + 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 606b7d8ffe13..9b61bfbea6cd 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -97,6 +97,9 @@ #define BTR_TSEG2_SHIFT 12 #define BTR_TSEG2_MASK (0x7 << BTR_TSEG2_SHIFT) +/* interrupt register */ +#define INT_STS_PENDING 0x8000 + /* brp extension register */ #define BRP_EXT_BRPE_MASK 0x0f #define BRP_EXT_BRPE_SHIFT 0 @@ -1029,10 +1032,16 @@ static int c_can_poll(struct napi_struct *napi, int quota) u16 curr, last = priv->last_status; int work_done = 0; - priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG); - /* Ack status on C_CAN. D_CAN is self clearing */ - if (priv->type != BOSCH_D_CAN) - priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED); + /* Only read the status register if a status interrupt was pending */ + if (atomic_xchg(&priv->sie_pending, 0)) { + priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG); + /* Ack status on C_CAN. D_CAN is self clearing */ + if (priv->type != BOSCH_D_CAN) + priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED); + } else { + /* no change detected ... */ + curr = last; + } /* handle state changes */ if ((curr & STATUS_EWARN) && (!(last & STATUS_EWARN))) { @@ -1083,10 +1092,16 @@ static irqreturn_t c_can_isr(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; struct c_can_priv *priv = netdev_priv(dev); + int reg_int; - if (!priv->read_reg(priv, C_CAN_INT_REG)) + reg_int = priv->read_reg(priv, C_CAN_INT_REG); + if (!reg_int) return IRQ_NONE; + /* save for later use */ + if (reg_int & INT_STS_PENDING) + atomic_set(&priv->sie_pending, 1); + /* disable all interrupts and schedule the NAPI */ c_can_irq_control(priv, false); napi_schedule(&priv->napi); diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h index 8acdc7fa4792..d5567a7c1c6d 100644 --- a/drivers/net/can/c_can/c_can.h +++ b/drivers/net/can/c_can/c_can.h @@ -198,6 +198,7 @@ struct c_can_priv { struct net_device *dev; struct device *device; atomic_t tx_active; + atomic_t sie_pending; unsigned long tx_dir; int last_status; u16 (*read_reg) (const struct c_can_priv *priv, enum reg index); -- cgit v1.2.3 From 23c5a9488f076bab336177cd1d1a366bd8ddf087 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 1 Oct 2019 21:01:20 +0000 Subject: can: c_can: D_CAN: c_can_chip_config(): perform a sofware reset on open When the CAN interface is closed it the hardwre is put in power down mode, but does not reset the error counters / state. Reset the D_CAN on open, so the reported state and the actual state match. According to [1], the C_CAN module doesn't have the software reset. [1] http://www.bosch-semiconductors.com/media/ip_modules/pdf_2/c_can_fd8/users_manual_c_can_fd8_r210_1.pdf Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 9b61bfbea6cd..24c6015f6c92 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -52,6 +52,7 @@ #define CONTROL_EX_PDR BIT(8) /* control register */ +#define CONTROL_SWR BIT(15) #define CONTROL_TEST BIT(7) #define CONTROL_CCE BIT(6) #define CONTROL_DISABLE_AR BIT(5) @@ -572,6 +573,26 @@ static void c_can_configure_msg_objects(struct net_device *dev) IF_MCONT_RCV_EOB); } +static int c_can_software_reset(struct net_device *dev) +{ + struct c_can_priv *priv = netdev_priv(dev); + int retry = 0; + + if (priv->type != BOSCH_D_CAN) + return 0; + + priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_SWR | CONTROL_INIT); + while (priv->read_reg(priv, C_CAN_CTRL_REG) & CONTROL_SWR) { + msleep(20); + if (retry++ > 100) { + netdev_err(dev, "CCTRL: software reset failed\n"); + return -EIO; + } + } + + return 0; +} + /* * Configure C_CAN chip: * - enable/disable auto-retransmission @@ -581,6 +602,11 @@ static void c_can_configure_msg_objects(struct net_device *dev) static int c_can_chip_config(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); + int err; + + err = c_can_software_reset(dev); + if (err) + return err; /* enable automatic retransmission */ priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_ENABLE_AR); -- cgit v1.2.3 From 6f12001ad5e79d0a0b08c599731d45c34cafd376 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 1 Oct 2019 21:01:24 +0000 Subject: can: c_can: C_CAN: add bus recovery events While the state is updated when the error counters increase and decrease, there is no event when the bus recovers and the error counters decrease again. So add that event as well. Change the state going downward to be ERROR_PASSIVE -> ERROR_WARNING -> ERROR_ACTIVE instead of directly to ERROR_ACTIVE again. Signed-off-by: Jeroen Hofstee Acked-by: Kurt Van Dijck Tested-by: Kurt Van Dijck Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 24c6015f6c92..8e9f5620c9a2 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -915,6 +915,9 @@ static int c_can_handle_state_change(struct net_device *dev, struct can_berr_counter bec; switch (error_type) { + case C_CAN_NO_ERROR: + priv->can.state = CAN_STATE_ERROR_ACTIVE; + break; case C_CAN_ERROR_WARNING: /* error warning state */ priv->can.can_stats.error_warning++; @@ -945,6 +948,13 @@ static int c_can_handle_state_change(struct net_device *dev, ERR_CNT_RP_SHIFT; switch (error_type) { + case C_CAN_NO_ERROR: + /* error warning state */ + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_ACTIVE; + cf->data[6] = bec.txerr; + cf->data[7] = bec.rxerr; + break; case C_CAN_ERROR_WARNING: /* error warning state */ cf->can_id |= CAN_ERR_CRTL; @@ -1089,11 +1099,17 @@ static int c_can_poll(struct napi_struct *napi, int quota) /* handle bus recovery events */ if ((!(curr & STATUS_BOFF)) && (last & STATUS_BOFF)) { netdev_dbg(dev, "left bus off state\n"); - priv->can.state = CAN_STATE_ERROR_ACTIVE; + work_done += c_can_handle_state_change(dev, C_CAN_ERROR_PASSIVE); } + if ((!(curr & STATUS_EPASS)) && (last & STATUS_EPASS)) { netdev_dbg(dev, "left error passive state\n"); - priv->can.state = CAN_STATE_ERROR_ACTIVE; + work_done += c_can_handle_state_change(dev, C_CAN_ERROR_WARNING); + } + + if ((!(curr & STATUS_EWARN)) && (last & STATUS_EWARN)) { + netdev_dbg(dev, "left error warning state\n"); + work_done += c_can_handle_state_change(dev, C_CAN_NO_ERROR); } /* handle lec errors on the bus */ -- cgit v1.2.3 From 659680bc232ff29cd6aea8df58115775ac365565 Mon Sep 17 00:00:00 2001 From: Appana Durga Kedareswara rao Date: Wed, 9 Oct 2019 12:59:47 +0530 Subject: can: xilinx_can: Fix flags field initialization for axi can AXI CANIP doesn't support tx fifo empty interrupt feature(TXFEMP), update the flags filed in the driver for AXI CAN case accordingly. Fixes: 3281b380ec9f ("can: xilinx_can: Fix flags field initialization for axi can and canps") Reported-by: Anssi Hannula Signed-off-by: Appana Durga Kedareswara rao Signed-off-by: Marc Kleine-Budde --- drivers/net/can/xilinx_can.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 911b34316c9d..7c482b2d78d2 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -1599,7 +1599,6 @@ static const struct xcan_devtype_data xcan_zynq_data = { static const struct xcan_devtype_data xcan_axi_data = { .cantype = XAXI_CAN, - .flags = XCAN_FLAG_TXFEMP, .bittiming_const = &xcan_bittiming_const, .btr_ts2_shift = XCAN_BTR_TS2_SHIFT, .btr_sjw_shift = XCAN_BTR_SJW_SHIFT, -- cgit v1.2.3 From ca913f1ac024559ebc17f0b599af262f0ad997c9 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 15:48:48 +0200 Subject: can: rx-offload: can_rx_offload_queue_sorted(): fix error handling, avoid skb mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the rx-offload skb_queue is full can_rx_offload_queue_sorted() will not queue the skb and return with an error. None of the callers of this function, issue a kfree_skb() to free the not queued skb. This results in a memory leak. This patch fixes the problem by freeing the skb in case of a full queue. The return value is adjusted to -ENOBUFS to better reflect the actual problem. The device stats handling is left to the callers, as this function might be used in both the rx and tx path. Fixes: 55059f2b7f86 ("can: rx-offload: introduce can_rx_offload_get_echo_skb() and can_rx_offload_queue_sorted() functions") Cc: linux-stable Cc: Martin Hundebøll Reported-by: Martin Hundebøll Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index e6a668ee7730..663697439d1c 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -207,8 +207,10 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, unsigned long flags; if (skb_queue_len(&offload->skb_queue) > - offload->skb_queue_len_max) - return -ENOMEM; + offload->skb_queue_len_max) { + kfree_skb(skb); + return -ENOBUFS; + } cb = can_rx_offload_get_cb(skb); cb->timestamp = timestamp; -- cgit v1.2.3 From 6caf8a6d6586d44fd72f4aa1021d14aa82affafb Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 15:48:48 +0200 Subject: can: rx-offload: can_rx_offload_queue_tail(): fix error handling, avoid skb mem leak If the rx-offload skb_queue is full can_rx_offload_queue_tail() will not queue the skb and return with an error. This patch frees the skb in case of a full queue, which brings can_rx_offload_queue_tail() in line with the can_rx_offload_queue_sorted() function, which has been adjusted in the previous patch. The return value is adjusted to -ENOBUFS to better reflect the actual problem. The device stats handling is left to the caller. Fixes: d254586c3453 ("can: rx-offload: Add support for HW fifo based irq offloading") Reported-by: Kurt Van Dijck Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 663697439d1c..d1c863409945 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -252,8 +252,10 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb) { if (skb_queue_len(&offload->skb_queue) > - offload->skb_queue_len_max) - return -ENOMEM; + offload->skb_queue_len_max) { + kfree_skb(skb); + return -ENOBUFS; + } skb_queue_tail(&offload->skb_queue, skb); can_rx_offload_schedule(offload); -- cgit v1.2.3 From a2dc3f5e1022a5ede8af9ab89a144f1e69db8636 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 16:03:18 +0200 Subject: can: rx-offload: can_rx_offload_offload_one(): do not increase the skb_queue beyond skb_queue_len_max The skb_queue is a linked list, holding the skb to be processed in the next NAPI call. Without this patch, the queue length in can_rx_offload_offload_one() is limited to skb_queue_len_max + 1. As the skb_queue is a linked list, no array or other resources are accessed out-of-bound, however this behaviour is counterintuitive. This patch limits the rx-offload skb_queue length to skb_queue_len_max. Fixes: d254586c3453 ("can: rx-offload: Add support for HW fifo based irq offloading") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index d1c863409945..bdc27481b57f 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -115,7 +115,7 @@ static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload int ret; /* If queue is full or skb not available, read to discard mailbox */ - if (likely(skb_queue_len(&offload->skb_queue) <= + if (likely(skb_queue_len(&offload->skb_queue) < offload->skb_queue_len_max)) skb = alloc_can_skb(offload->dev, &cf); -- cgit v1.2.3 From 4e9016bee3bf0c24963097edace034ff205b565c Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 15:15:07 +0200 Subject: can: rx-offload: can_rx_offload_offload_one(): increment rx_fifo_errors on queue overflow or OOM If the rx-offload skb_queue is full or the skb allocation fails (due to OOM), the mailbox contents is discarded. This patch adds the incrementing of the rx_fifo_errors statistics counter. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index bdc27481b57f..e224530a0630 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -125,8 +125,10 @@ static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload ret = offload->mailbox_read(offload, &cf_overflow, ×tamp, n); - if (ret) + if (ret) { offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + } return NULL; } -- cgit v1.2.3 From d763ab3044f0bf50bd0e6179f6b2cf1c125d1d94 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 21:00:32 +0200 Subject: can: rx-offload: can_rx_offload_offload_one(): use ERR_PTR() to propagate error value in case of errors Before this patch can_rx_offload_offload_one() returns a pointer to a skb containing the read CAN frame or a NULL pointer. However the meaning of the NULL pointer is ambiguous, it can either mean the requested mailbox is empty or there was an error. This patch fixes this situation by returning: - pointer to skb on success - NULL pointer if mailbox is empty - ERR_PTR() in case of an error All users of can_rx_offload_offload_one() have been adopted, no functional change intended. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 86 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index e224530a0630..3f5e040f0c71 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -107,39 +107,95 @@ static int can_rx_offload_compare(struct sk_buff *a, struct sk_buff *b) return cb_b->timestamp - cb_a->timestamp; } -static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) +/** + * can_rx_offload_offload_one() - Read one CAN frame from HW + * @offload: pointer to rx_offload context + * @n: number of mailbox to read + * + * The task of this function is to read a CAN frame from mailbox @n + * from the device and return the mailbox's content as a struct + * sk_buff. + * + * If the struct can_rx_offload::skb_queue exceeds the maximal queue + * length (struct can_rx_offload::skb_queue_len_max) or no skb can be + * allocated, the mailbox contents is discarded by reading it into an + * overflow buffer. This way the mailbox is marked as free by the + * driver. + * + * Return: A pointer to skb containing the CAN frame on success. + * + * NULL if the mailbox @n is empty. + * + * ERR_PTR() in case of an error + */ +static struct sk_buff * +can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) { - struct sk_buff *skb = NULL; + struct sk_buff *skb = NULL, *skb_error = NULL; struct can_rx_offload_cb *cb; struct can_frame *cf; int ret; - /* If queue is full or skb not available, read to discard mailbox */ if (likely(skb_queue_len(&offload->skb_queue) < - offload->skb_queue_len_max)) + offload->skb_queue_len_max)) { skb = alloc_can_skb(offload->dev, &cf); + if (unlikely(!skb)) + skb_error = ERR_PTR(-ENOMEM); /* skb alloc failed */ + } else { + skb_error = ERR_PTR(-ENOBUFS); /* skb_queue is full */ + } - if (!skb) { + /* If queue is full or skb not available, drop by reading into + * overflow buffer. + */ + if (unlikely(skb_error)) { struct can_frame cf_overflow; u32 timestamp; ret = offload->mailbox_read(offload, &cf_overflow, ×tamp, n); - if (ret) { - offload->dev->stats.rx_dropped++; - offload->dev->stats.rx_fifo_errors++; - } - return NULL; + /* Mailbox was empty. */ + if (unlikely(!ret)) + return NULL; + + /* Mailbox has been read and we're dropping it or + * there was a problem reading the mailbox. + * + * Increment error counters in any case. + */ + offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + + /* There was a problem reading the mailbox, propagate + * error value. + */ + if (unlikely(ret < 0)) + return ERR_PTR(ret); + + return skb_error; } cb = can_rx_offload_get_cb(skb); ret = offload->mailbox_read(offload, cf, &cb->timestamp, n); - if (!ret) { + + /* Mailbox was empty. */ + if (unlikely(!ret)) { kfree_skb(skb); return NULL; } + /* There was a problem reading the mailbox, propagate error value. */ + if (unlikely(ret < 0)) { + kfree_skb(skb); + + offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + + return ERR_PTR(ret); + } + + /* Mailbox was read. */ return skb; } @@ -159,7 +215,7 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pen continue; skb = can_rx_offload_offload_one(offload, i); - if (!skb) + if (IS_ERR_OR_NULL(skb)) break; __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare); @@ -190,7 +246,11 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) struct sk_buff *skb; int received = 0; - while ((skb = can_rx_offload_offload_one(offload, 0))) { + while (1) { + skb = can_rx_offload_offload_one(offload, 0); + if (IS_ERR_OR_NULL(skb)) + break; + skb_queue_tail(&offload->skb_queue, skb); received++; } -- cgit v1.2.3 From c2a9f74c9d18acfdcabd3361adc7eac82c537a66 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:45:38 +0000 Subject: can: rx-offload: can_rx_offload_irq_offload_timestamp(): continue on error In case of a resource shortage, i.e. the rx_offload queue will overflow or a skb fails to be allocated (due to OOM), can_rx_offload_offload_one() will call mailbox_read() to discard the mailbox and return an ERR_PTR. However can_rx_offload_irq_offload_timestamp() bails out in the error case. In case of a resource shortage all mailboxes should be discarded, to avoid an IRQ storm and give the system some time to recover. Since can_rx_offload_irq_offload_timestamp() is typically called from a while loop, all message will eventually be discarded. So let's continue on error instead to discard them directly. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 3f5e040f0c71..2ea8676579a9 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -216,7 +216,7 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pen skb = can_rx_offload_offload_one(offload, i); if (IS_ERR_OR_NULL(skb)) - break; + continue; __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare); } -- cgit v1.2.3 From 1f7f504dcd9d1262437bdcf4fa071e41dec1af03 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 24 Sep 2019 18:45:38 +0000 Subject: can: rx-offload: can_rx_offload_irq_offload_fifo(): continue on error In case of a resource shortage, i.e. the rx_offload queue will overflow or a skb fails to be allocated (due to OOM), can_rx_offload_offload_one() will call mailbox_read() to discard the mailbox and return an ERR_PTR. If the hardware FIFO is empty can_rx_offload_offload_one() will return NULL. In case a CAN frame was read from the hardware, can_rx_offload_offload_one() returns the skb containing it. Without this patch can_rx_offload_irq_offload_fifo() bails out if no skb returned, regardless of the reason. Similar to can_rx_offload_irq_offload_timestamp() in case of a resource shortage the whole FIFO should be discarded, to avoid an IRQ storm and give the system some time to recover. However if the FIFO is empty the loop can be left. With this patch the loop is left in case of empty FIFO, but not on errors. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 2ea8676579a9..84cae167e42f 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -248,7 +248,9 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) while (1) { skb = can_rx_offload_offload_one(offload, 0); - if (IS_ERR_OR_NULL(skb)) + if (IS_ERR(skb)) + continue; + if (!skb) break; skb_queue_tail(&offload->skb_queue, skb); -- cgit v1.2.3 From 758124335a9dd649ab820bfb5b328170919ee7dc Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 15 Jul 2019 20:53:08 +0200 Subject: can: flexcan: increase error counters if skb enqueueing via can_rx_offload_queue_sorted() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to can_rx_offload_queue_sorted() may fail and return an error (in the current implementation due to resource shortage). The passed skb is consumed. This patch adds incrementing of the appropriate error counters to let the device statistics reflect that there's a problem. Reported-by: Martin Hundebøll Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 1cd5179cb876..57f9a2f51085 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -677,6 +677,7 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr) struct can_frame *cf; bool rx_errors = false, tx_errors = false; u32 timestamp; + int err; timestamp = priv->read(®s->timer) << 16; @@ -725,7 +726,9 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr) if (tx_errors) dev->stats.tx_errors++; - can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + if (err) + dev->stats.rx_fifo_errors++; } static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) @@ -738,6 +741,7 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) int flt; struct can_berr_counter bec; u32 timestamp; + int err; timestamp = priv->read(®s->timer) << 16; @@ -769,7 +773,9 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) if (unlikely(new_state == CAN_STATE_BUS_OFF)) can_bus_off(dev); - can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + if (err) + dev->stats.rx_fifo_errors++; } static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload) -- cgit v1.2.3 From c4409e9fbea954fdae7927205283dfc3ed8e2d6e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 15 Jul 2019 20:53:08 +0200 Subject: can: ti_hecc: ti_hecc_error(): increase error counters if skb enqueueing via can_rx_offload_queue_sorted() fails The call to can_rx_offload_queue_sorted() may fail and return an error (in the current implementation due to resource shortage). The passed skb is consumed. This patch adds incrementing of the appropriate error counters to let the device statistics reflect that there's a problem. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index f8b19eef5d26..91188e6d4f78 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -558,6 +558,7 @@ static int ti_hecc_error(struct net_device *ndev, int int_status, struct can_frame *cf; struct sk_buff *skb; u32 timestamp; + int err; /* propagate the error condition to the can stack */ skb = alloc_can_err_skb(ndev, &cf); @@ -639,7 +640,9 @@ static int ti_hecc_error(struct net_device *ndev, int int_status, } timestamp = hecc_read(priv, HECC_CANLNT); - can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + if (err) + ndev->stats.rx_fifo_errors++; return 0; } -- cgit v1.2.3 From 59f415c2f5e20a6859e49626e8af4de983ff111c Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:45:52 +0000 Subject: can: ti_hecc: ti_hecc_stop(): stop the CPK on down When the interface goes down, the CPK should no longer take an active part in the CAN-bus communication, like sending acks and error frames. So enable configuration mode in ti_hecc_stop, so the CPK is no longer active. When a transceiver switch is present the acks and errors don't make it to the bus, but disabling the CPK then does prevent oddities, like ti_hecc_reset() failing, since the CPK can become bus-off and starts counting the 11 bit recessive bits, which seems to block the reset. It can also cause invalid interrupts and disrupt the CAN-bus, since transmission can be stopped in the middle of a message, by disabling the tranceiver while the CPK is sending. Since the CPK is disabled after normal power on, it is typically only seen when the interface is restarted. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 91188e6d4f78..eb8151154083 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -400,6 +400,9 @@ static void ti_hecc_stop(struct net_device *ndev) { struct ti_hecc_priv *priv = netdev_priv(ndev); + /* Disable the CPK; stop sending, erroring and acking */ + hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_CCR); + /* Disable interrupts and disable mailboxes */ hecc_write(priv, HECC_CANGIM, 0); hecc_write(priv, HECC_CANMIM, 0); -- cgit v1.2.3 From 10f5d55ddcbf1c30529d90beffedcf84844d6f42 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:45:56 +0000 Subject: can: ti_hecc: keep MIM and MD set The HECC_CANMIM is set in the xmit path and cleared in the interrupt. Since this is done with a read, modify, write action the register might end up with some more MIM enabled then intended, since it is not protected. That doesn't matter at all, since the tx interrupt disables the mailbox with HECC_CANME (while holding a spinlock). So lets just always keep MIM set. While at it, since the mailbox direction never changes, don't set it every time a message is send, ti_hecc_reset() already sets them to tx. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index eb8151154083..d6a84f8ff863 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -382,6 +382,9 @@ static void ti_hecc_start(struct net_device *ndev) hecc_set_bit(priv, HECC_CANMIM, mbx_mask); } + /* Enable tx interrupts */ + hecc_set_bit(priv, HECC_CANMIM, BIT(HECC_MAX_TX_MBOX) - 1); + /* Prevent message over-write & Enable interrupts */ hecc_write(priv, HECC_CANOPC, HECC_SET_REG); if (priv->use_hecc1int) { @@ -511,8 +514,6 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev) hecc_set_bit(priv, HECC_CANME, mbx_mask); spin_unlock_irqrestore(&priv->mbx_lock, flags); - hecc_clear_bit(priv, HECC_CANMD, mbx_mask); - hecc_set_bit(priv, HECC_CANMIM, mbx_mask); hecc_write(priv, HECC_CANTRS, mbx_mask); return NETDEV_TX_OK; @@ -676,7 +677,6 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) mbx_mask = BIT(mbxno); if (!(mbx_mask & hecc_read(priv, HECC_CANTA))) break; - hecc_clear_bit(priv, HECC_CANMIM, mbx_mask); hecc_write(priv, HECC_CANTA, mbx_mask); spin_lock_irqsave(&priv->mbx_lock, flags); hecc_clear_bit(priv, HECC_CANME, mbx_mask); -- cgit v1.2.3 From 99383749c25954c23c87e1592f6b49b216e0a2e2 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:45:49 +0000 Subject: can: ti_hecc: release the mailbox a bit earlier Release the mailbox after reading it, so it can be reused a bit earlier. Since "can: rx-offload: continue on error" all pending message bits are cleared directly, so remove clearing them in ti_hecc. Suggested-by: Marc Kleine-Budde Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index d6a84f8ff863..6ea29126c60b 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -530,8 +530,9 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload, u32 *timestamp, unsigned int mbxno) { struct ti_hecc_priv *priv = rx_offload_to_priv(offload); - u32 data; + u32 data, mbx_mask; + mbx_mask = BIT(mbxno); data = hecc_read_mbx(priv, mbxno, HECC_CANMID); if (data & HECC_CANMID_IDE) cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG; @@ -551,6 +552,7 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload, } *timestamp = hecc_read_stamp(priv, mbxno); + hecc_write(priv, HECC_CANRMP, mbx_mask); return 1; } @@ -701,7 +703,6 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) while ((rx_pending = hecc_read(priv, HECC_CANRMP))) { can_rx_offload_irq_offload_timestamp(&priv->offload, rx_pending); - hecc_write(priv, HECC_CANRMP, rx_pending); } } -- cgit v1.2.3 From 678d85ed8554e1d6c9720ebcab785eea8fe0d4ef Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:46:00 +0000 Subject: can: ti_hecc: add fifo overflow error reporting When the rx FIFO overflows the ti_hecc would silently drop them since the overwrite protection is enabled for all mailboxes. So disable it for the lowest priority mailbox and return a proper error value when receive message lost is set. Drop the message itself in that case, since it might be partially updated. Signed-off-by: Jeroen Hofstee Acked-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 6ea29126c60b..b12fd0bd489d 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -73,6 +73,7 @@ MODULE_VERSION(HECC_MODULE_VERSION); */ #define HECC_MAX_RX_MBOX (HECC_MAX_MAILBOXES - HECC_MAX_TX_MBOX) #define HECC_RX_FIRST_MBOX (HECC_MAX_MAILBOXES - 1) +#define HECC_RX_LAST_MBOX (HECC_MAX_TX_MBOX) /* TI HECC module registers */ #define HECC_CANME 0x0 /* Mailbox enable */ @@ -82,7 +83,7 @@ MODULE_VERSION(HECC_MODULE_VERSION); #define HECC_CANTA 0x10 /* Transmission acknowledge */ #define HECC_CANAA 0x14 /* Abort acknowledge */ #define HECC_CANRMP 0x18 /* Receive message pending */ -#define HECC_CANRML 0x1C /* Remote message lost */ +#define HECC_CANRML 0x1C /* Receive message lost */ #define HECC_CANRFP 0x20 /* Remote frame pending */ #define HECC_CANGAM 0x24 /* SECC only:Global acceptance mask */ #define HECC_CANMC 0x28 /* Master control */ @@ -385,8 +386,15 @@ static void ti_hecc_start(struct net_device *ndev) /* Enable tx interrupts */ hecc_set_bit(priv, HECC_CANMIM, BIT(HECC_MAX_TX_MBOX) - 1); - /* Prevent message over-write & Enable interrupts */ - hecc_write(priv, HECC_CANOPC, HECC_SET_REG); + /* Prevent message over-write to create a rx fifo, but not for + * the lowest priority mailbox, since that allows detecting + * overflows instead of the hardware silently dropping the + * messages. + */ + mbx_mask = ~BIT(HECC_RX_LAST_MBOX); + hecc_write(priv, HECC_CANOPC, mbx_mask); + + /* Enable interrupts */ if (priv->use_hecc1int) { hecc_write(priv, HECC_CANMIL, HECC_SET_REG); hecc_write(priv, HECC_CANGIM, HECC_CANGIM_DEF_MASK | @@ -531,6 +539,7 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload, { struct ti_hecc_priv *priv = rx_offload_to_priv(offload); u32 data, mbx_mask; + int ret = 1; mbx_mask = BIT(mbxno); data = hecc_read_mbx(priv, mbxno, HECC_CANMID); @@ -552,9 +561,26 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload, } *timestamp = hecc_read_stamp(priv, mbxno); + + /* Check for FIFO overrun. + * + * All but the last RX mailbox have activated overwrite + * protection. So skip check for overrun, if we're not + * handling the last RX mailbox. + * + * As the overwrite protection for the last RX mailbox is + * disabled, the CAN core might update while we're reading + * it. This means the skb might be inconsistent. + * + * Return an error to let rx-offload discard this CAN frame. + */ + if (unlikely(mbxno == HECC_RX_LAST_MBOX && + hecc_read(priv, HECC_CANRML) & mbx_mask)) + ret = -ENOBUFS; + hecc_write(priv, HECC_CANRMP, mbx_mask); - return 1; + return ret; } static int ti_hecc_error(struct net_device *ndev, int int_status, @@ -884,7 +910,7 @@ static int ti_hecc_probe(struct platform_device *pdev) priv->offload.mailbox_read = ti_hecc_mailbox_read; priv->offload.mb_first = HECC_RX_FIRST_MBOX; - priv->offload.mb_last = HECC_MAX_TX_MBOX; + priv->offload.mb_last = HECC_RX_LAST_MBOX; err = can_rx_offload_add_timestamp(ndev, &priv->offload); if (err) { dev_err(&pdev->dev, "can_rx_offload_add_timestamp() failed\n"); -- cgit v1.2.3 From 3b2d652da21450aba19d299a75fe3a6f5d4003ff Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:46:03 +0000 Subject: can: ti_hecc: properly report state changes The HECC_CANES register handles the flags specially, it only updates the flags after a one is written to them. Since the interrupt for frame errors is not enabled an old error can hence been seen when a state interrupt arrives. For example if the device is not connected to the CAN-bus the error warning interrupt will have HECC_CANES indicating there is no ack. The error passive interrupt thereafter will have HECC_CANES flagging that there is a warning level. And if thereafter there is a message successfully send HECC_CANES points to an error passive event, while in reality it became error warning again. In summary, the state is not always reported correctly. So handle the state changes and frame errors separately. The state changes are now based on the interrupt flags and handled directly when they occur. The reporting of the frame errors is still done as before, as a side effect of another interrupt. note: the hecc_clear_bit will do a read, modify, write. So it will not only clear the bit, but also reset all other bits being set as a side affect, hence it is replaced with only clearing the flags. note: The HECC_CANMC_CCR is no longer cleared in the state change interrupt, it is completely unrelated. And use net_ratelimit to make checkpatch happy. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 162 +++++++++++++++++++++++++--------------------- 1 file changed, 88 insertions(+), 74 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index b12fd0bd489d..4c6d3ce0e8c4 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -150,6 +150,8 @@ MODULE_VERSION(HECC_MODULE_VERSION); #define HECC_BUS_ERROR (HECC_CANES_FE | HECC_CANES_BE |\ HECC_CANES_CRCE | HECC_CANES_SE |\ HECC_CANES_ACKE) +#define HECC_CANES_FLAGS (HECC_BUS_ERROR | HECC_CANES_BO |\ + HECC_CANES_EP | HECC_CANES_EW) #define HECC_CANMCF_RTR BIT(4) /* Remote transmit request */ @@ -592,91 +594,69 @@ static int ti_hecc_error(struct net_device *ndev, int int_status, u32 timestamp; int err; - /* propagate the error condition to the can stack */ - skb = alloc_can_err_skb(ndev, &cf); - if (!skb) { - if (printk_ratelimit()) - netdev_err(priv->ndev, - "%s: alloc_can_err_skb() failed\n", - __func__); - return -ENOMEM; - } - - if (int_status & HECC_CANGIF_WLIF) { /* warning level int */ - if ((int_status & HECC_CANGIF_BOIF) == 0) { - priv->can.state = CAN_STATE_ERROR_WARNING; - ++priv->can.can_stats.error_warning; - cf->can_id |= CAN_ERR_CRTL; - if (hecc_read(priv, HECC_CANTEC) > 96) - cf->data[1] |= CAN_ERR_CRTL_TX_WARNING; - if (hecc_read(priv, HECC_CANREC) > 96) - cf->data[1] |= CAN_ERR_CRTL_RX_WARNING; - } - hecc_set_bit(priv, HECC_CANES, HECC_CANES_EW); - netdev_dbg(priv->ndev, "Error Warning interrupt\n"); - hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR); - } - - if (int_status & HECC_CANGIF_EPIF) { /* error passive int */ - if ((int_status & HECC_CANGIF_BOIF) == 0) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - ++priv->can.can_stats.error_passive; - cf->can_id |= CAN_ERR_CRTL; - if (hecc_read(priv, HECC_CANTEC) > 127) - cf->data[1] |= CAN_ERR_CRTL_TX_PASSIVE; - if (hecc_read(priv, HECC_CANREC) > 127) - cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE; + if (err_status & HECC_BUS_ERROR) { + /* propagate the error condition to the can stack */ + skb = alloc_can_err_skb(ndev, &cf); + if (!skb) { + if (net_ratelimit()) + netdev_err(priv->ndev, + "%s: alloc_can_err_skb() failed\n", + __func__); + return -ENOMEM; } - hecc_set_bit(priv, HECC_CANES, HECC_CANES_EP); - netdev_dbg(priv->ndev, "Error passive interrupt\n"); - hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR); - } - - /* Need to check busoff condition in error status register too to - * ensure warning interrupts don't hog the system - */ - if ((int_status & HECC_CANGIF_BOIF) || (err_status & HECC_CANES_BO)) { - priv->can.state = CAN_STATE_BUS_OFF; - cf->can_id |= CAN_ERR_BUSOFF; - hecc_set_bit(priv, HECC_CANES, HECC_CANES_BO); - hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR); - /* Disable all interrupts in bus-off to avoid int hog */ - hecc_write(priv, HECC_CANGIM, 0); - ++priv->can.can_stats.bus_off; - can_bus_off(ndev); - } - if (err_status & HECC_BUS_ERROR) { ++priv->can.can_stats.bus_error; cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; - if (err_status & HECC_CANES_FE) { - hecc_set_bit(priv, HECC_CANES, HECC_CANES_FE); + if (err_status & HECC_CANES_FE) cf->data[2] |= CAN_ERR_PROT_FORM; - } - if (err_status & HECC_CANES_BE) { - hecc_set_bit(priv, HECC_CANES, HECC_CANES_BE); + if (err_status & HECC_CANES_BE) cf->data[2] |= CAN_ERR_PROT_BIT; - } - if (err_status & HECC_CANES_SE) { - hecc_set_bit(priv, HECC_CANES, HECC_CANES_SE); + if (err_status & HECC_CANES_SE) cf->data[2] |= CAN_ERR_PROT_STUFF; - } - if (err_status & HECC_CANES_CRCE) { - hecc_set_bit(priv, HECC_CANES, HECC_CANES_CRCE); + if (err_status & HECC_CANES_CRCE) cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; - } - if (err_status & HECC_CANES_ACKE) { - hecc_set_bit(priv, HECC_CANES, HECC_CANES_ACKE); + if (err_status & HECC_CANES_ACKE) cf->data[3] = CAN_ERR_PROT_LOC_ACK; - } + + timestamp = hecc_read(priv, HECC_CANLNT); + err = can_rx_offload_queue_sorted(&priv->offload, skb, + timestamp); + if (err) + ndev->stats.rx_fifo_errors++; + } + + hecc_write(priv, HECC_CANES, HECC_CANES_FLAGS); + + return 0; +} + +static void ti_hecc_change_state(struct net_device *ndev, + enum can_state rx_state, + enum can_state tx_state) +{ + struct ti_hecc_priv *priv = netdev_priv(ndev); + struct can_frame *cf; + struct sk_buff *skb; + u32 timestamp; + int err; + + skb = alloc_can_err_skb(priv->ndev, &cf); + if (unlikely(!skb)) { + priv->can.state = max(tx_state, rx_state); + return; + } + + can_change_state(priv->ndev, cf, tx_state, rx_state); + + if (max(tx_state, rx_state) != CAN_STATE_BUS_OFF) { + cf->data[6] = hecc_read(priv, HECC_CANTEC); + cf->data[7] = hecc_read(priv, HECC_CANREC); } timestamp = hecc_read(priv, HECC_CANLNT); err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); if (err) ndev->stats.rx_fifo_errors++; - - return 0; } static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) @@ -686,6 +666,7 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) struct net_device_stats *stats = &ndev->stats; u32 mbxno, mbx_mask, int_status, err_status, stamp; unsigned long flags, rx_pending; + u32 handled = 0; int_status = hecc_read(priv, priv->use_hecc1int ? @@ -695,10 +676,43 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) return IRQ_NONE; err_status = hecc_read(priv, HECC_CANES); - if (err_status & (HECC_BUS_ERROR | HECC_CANES_BO | - HECC_CANES_EP | HECC_CANES_EW)) + if (unlikely(err_status & HECC_CANES_FLAGS)) ti_hecc_error(ndev, int_status, err_status); + if (unlikely(int_status & HECC_CANGIM_DEF_MASK)) { + enum can_state rx_state, tx_state; + u32 rec = hecc_read(priv, HECC_CANREC); + u32 tec = hecc_read(priv, HECC_CANTEC); + + if (int_status & HECC_CANGIF_WLIF) { + handled |= HECC_CANGIF_WLIF; + rx_state = rec >= tec ? CAN_STATE_ERROR_WARNING : 0; + tx_state = rec <= tec ? CAN_STATE_ERROR_WARNING : 0; + netdev_dbg(priv->ndev, "Error Warning interrupt\n"); + ti_hecc_change_state(ndev, rx_state, tx_state); + } + + if (int_status & HECC_CANGIF_EPIF) { + handled |= HECC_CANGIF_EPIF; + rx_state = rec >= tec ? CAN_STATE_ERROR_PASSIVE : 0; + tx_state = rec <= tec ? CAN_STATE_ERROR_PASSIVE : 0; + netdev_dbg(priv->ndev, "Error passive interrupt\n"); + ti_hecc_change_state(ndev, rx_state, tx_state); + } + + if (int_status & HECC_CANGIF_BOIF) { + handled |= HECC_CANGIF_BOIF; + rx_state = CAN_STATE_BUS_OFF; + tx_state = CAN_STATE_BUS_OFF; + netdev_dbg(priv->ndev, "Bus off interrupt\n"); + + /* Disable all interrupts */ + hecc_write(priv, HECC_CANGIM, 0); + can_bus_off(ndev); + ti_hecc_change_state(ndev, rx_state, tx_state); + } + } + if (int_status & HECC_CANGIF_GMIF) { while (priv->tx_tail - priv->tx_head > 0) { mbxno = get_tx_tail_mb(priv); @@ -734,10 +748,10 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) /* clear all interrupt conditions - read back to avoid spurious ints */ if (priv->use_hecc1int) { - hecc_write(priv, HECC_CANGIF1, HECC_SET_REG); + hecc_write(priv, HECC_CANGIF1, handled); int_status = hecc_read(priv, HECC_CANGIF1); } else { - hecc_write(priv, HECC_CANGIF0, HECC_SET_REG); + hecc_write(priv, HECC_CANGIF0, handled); int_status = hecc_read(priv, HECC_CANGIF0); } -- cgit v1.2.3 From b5018be6d5dd9dd257bf8236298daac8b1262750 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:46:06 +0000 Subject: can: ti_hecc: add missing state changes While the ti_hecc has interrupts to report when the error counters increase to a certain level and which change state it doesn't handle the case that the error counters go down again, so the reported state can actually be wrong. Since there is no interrupt for that, do update state based on the error counters, when the state is not error active and goes down again. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 4c6d3ce0e8c4..31ad364a89bb 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -711,6 +711,23 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) can_bus_off(ndev); ti_hecc_change_state(ndev, rx_state, tx_state); } + } else if (unlikely(priv->can.state != CAN_STATE_ERROR_ACTIVE)) { + enum can_state new_state, tx_state, rx_state; + u32 rec = hecc_read(priv, HECC_CANREC); + u32 tec = hecc_read(priv, HECC_CANTEC); + + if (rec >= 128 || tec >= 128) + new_state = CAN_STATE_ERROR_PASSIVE; + else if (rec >= 96 || tec >= 96) + new_state = CAN_STATE_ERROR_WARNING; + else + new_state = CAN_STATE_ERROR_ACTIVE; + + if (new_state < priv->can.state) { + rx_state = rec >= tec ? new_state : 0; + tx_state = rec <= tec ? new_state : 0; + ti_hecc_change_state(ndev, rx_state, tx_state); + } } if (int_status & HECC_CANGIF_GMIF) { -- cgit v1.2.3 From db1a804cca6fe0cea9dea888d50dda134713c340 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 18 Sep 2019 11:11:56 +0100 Subject: can: j1939: fix resource leak of skb on error return paths Currently the error return paths do not free skb and this results in a memory leak. Fix this by freeing them before the return. Addresses-Coverity: ("Resource leak") Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Colin Ian King Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 37c1040bcb9c..5c6eabcb5df1 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -909,8 +909,10 @@ void j1939_sk_errqueue(struct j1939_session *session, memset(serr, 0, sizeof(*serr)); switch (type) { case J1939_ERRQUEUE_ACK: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) { + kfree_skb(skb); return; + } serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; @@ -918,8 +920,10 @@ void j1939_sk_errqueue(struct j1939_session *session, state = "ACK"; break; case J1939_ERRQUEUE_SCHED: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) { + kfree_skb(skb); return; + } serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; -- cgit v1.2.3 From 896daf723c845289a4ea1e68e74a5a5475aa796d Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 10 Oct 2019 12:50:31 +0200 Subject: can: j1939: fix memory leak if filters was set Filters array is coped from user space and linked to the j1939 socket. On socket release this memory was not freed. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 5c6eabcb5df1..4d8ba701e15d 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -580,6 +580,7 @@ static int j1939_sk_release(struct socket *sock) j1939_netdev_stop(priv); } + kfree(jsk->filters); sock_orphan(sk); sock->sk = NULL; -- cgit v1.2.3 From eaa654f164ba9acd5656e6485eeb5e73da8bfc3e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 25 Oct 2019 15:04:13 +0200 Subject: can: j1939: transport: j1939_session_fresh_new(): make sure EOMA is send with the total message size set We were sending malformed EOMA messageswith total message size set to 0. This patch fixes the bug. Reported-by: https://github.com/linux-can/can-utils/issues/159 Signed-off-by: Oleksij Rempel Acked-by: Kurt Van Dijck Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fe000ea757ea..06183d6f4fb7 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1432,7 +1432,7 @@ j1939_session *j1939_session_fresh_new(struct j1939_priv *priv, skcb = j1939_skb_to_cb(skb); memcpy(skcb, rel_skcb, sizeof(*skcb)); - session = j1939_session_new(priv, skb, skb->len); + session = j1939_session_new(priv, skb, size); if (!session) { kfree_skb(skb); return NULL; -- cgit v1.2.3 From 688d11c38423fd98ab6c8d5fc1976c8f365fc875 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 25 Oct 2019 15:04:13 +0200 Subject: can: j1939: transport: j1939_xtp_rx_eoma_one(): Add sanity check for correct total message size We were sending malformed EOMA with total message size set to 0. This issue has been fixed in the previous patch. In this patch a sanity check is added to the RX path and a error message is displayed. Signed-off-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 06183d6f4fb7..e5f1a56994c6 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1273,9 +1273,27 @@ j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb, static void j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb) { + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + const u8 *dat; + int len; + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) return; + dat = skb->data; + + if (skcb->addr.type == J1939_ETP) + len = j1939_etp_ctl_to_size(dat); + else + len = j1939_tp_ctl_to_size(dat); + + if (session->total_message_size != len) { + netdev_warn_once(session->priv->ndev, + "%s: 0x%p: Incorrect size. Expected: %i; got: %i.\n", + __func__, session, session->total_message_size, + len); + } + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); session->pkt.tx_acked = session->pkt.total; -- cgit v1.2.3 From 1f1be49fb67a58460c2216a77d3c5a8a3faa3a98 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 28 Oct 2019 20:15:17 +0200 Subject: drm/i915: Avoid HPD poll detect triggering a new detect cycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the HPD interrupt functionality the HW depends on power wells in the display core domain to be on. Accordingly when enabling these power wells the HPD polling logic will force an HPD detection cycle to account for hotplug events that may have happened when such a power well was off. Thus a detect cycle started by polling could start a new detect cycle if a power well in the display core domain gets enabled during detect and stays enabled after detect completes. That in turn can lead to a detection cycle runaway. To prevent re-triggering a poll-detect cycle make sure we drop all power references we acquired during detect synchronously by the end of detect. This will let the poll-detect logic continue with polling (matching the off state of the corresponding power wells) instead of scheduling a new detection cycle. Fixes: 6cfe7ec02e85 ("drm/i915: Remove the unneeded AUX power ref from intel_dp_detect()") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112125 Reported-and-tested-by: Val Kulkov Reported-and-tested-by: wangqr Cc: Val Kulkov Cc: wangqr Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191028181517.22602-1-imre.deak@intel.com (cherry picked from commit a8ddac7c9f06a12227a4f5febd1cbe0575a33179) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_crt.c | 7 +++++++ drivers/gpu/drm/i915/display/intel_dp.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_hdmi.c | 6 ++++++ 3 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index e6e8d4a82044..0a08354a6183 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -864,6 +864,13 @@ load_detect: out: intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); + + /* + * Make sure the refs for power wells enabled during detect are + * dropped to avoid a new detect cycle triggered by HPD polling. + */ + intel_display_power_flush_work(dev_priv); + return status; } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 57e9f0ba331b..2822a9f19844 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5436,6 +5436,12 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); + /* + * Make sure the refs for power wells enabled during detect are + * dropped to avoid a new detect cycle triggered by HPD polling. + */ + intel_display_power_flush_work(dev_priv); + return status; } diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index e02f0faecf02..b030f7ae3302 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -2565,6 +2565,12 @@ out: if (status != connector_status_connected) cec_notifier_phys_addr_invalidate(intel_hdmi->cec_notifier); + /* + * Make sure the refs for power wells enabled during detect are + * dropped to avoid a new detect cycle triggered by HPD polling. + */ + intel_display_power_flush_work(dev_priv); + return status; } -- cgit v1.2.3 From ee2c5ef8a9d640ee1617ec97b84fe2f634284e51 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Mon, 28 Oct 2019 18:10:14 -0700 Subject: drm/i915/dp: Do not switch aux to TBT mode for non-TC ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Non-TC ports always have tc_mode == TC_PORT_TBT_ALT so it was switching aux to TBT mode for all combo-phy ports, happily this did not caused any issue but is better follow BSpec. Also this is reserved bit before ICL. Cc: Imre Deak Signed-off-by: José Roberto de Souza Fixes: e9b7e1422d40 ("drm/i915: Sanitize the terminology used for TypeC port modes") Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20191029011014.286885-1-jose.souza@intel.com (cherry picked from commit 49748264826ff4cc7f0ebbdd6b0d1a36b13b1cee) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2822a9f19844..9b15ac4f2fb6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1256,6 +1256,9 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, u32 unused) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *i915 = + to_i915(intel_dig_port->base.base.dev); + enum phy phy = intel_port_to_phy(i915, intel_dig_port->base.port); u32 ret; ret = DP_AUX_CH_CTL_SEND_BUSY | @@ -1268,7 +1271,8 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(32) | DP_AUX_CH_CTL_SYNC_PULSE_SKL(32); - if (intel_dig_port->tc_mode == TC_PORT_TBT_ALT) + if (intel_phy_is_tc(i915, phy) && + intel_dig_port->tc_mode == TC_PORT_TBT_ALT) ret |= DP_AUX_CH_CTL_TBT_IO; return ret; -- cgit v1.2.3 From 52338415cf4d4064ae6b8dd972dadbda841da4fa Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 24 Oct 2019 11:28:29 +0800 Subject: timekeeping/vsyscall: Update VDSO data unconditionally The update of the VDSO data is depending on __arch_use_vsyscall() returning True. This is a leftover from the attempt to map the features of various architectures 1:1 into generic code. The usage of __arch_use_vsyscall() in the actual vsyscall implementations got dropped and replaced by the requirement for the architecture code to return U64_MAX if the global clocksource is not usable in the VDSO. But the __arch_use_vsyscall() check in the update code stayed which causes the VDSO data to be stale or invalid when an architecture actually implements that function and returns False when the current clocksource is not usable in the VDSO. As a consequence the VDSO implementations of clock_getres(), time(), clock_gettime(CLOCK_.*_COARSE) operate on invalid data and return bogus information. Remove the __arch_use_vsyscall() check from the VDSO update function and update the VDSO data unconditionally. [ tglx: Massaged changelog and removed the now useless implementations in asm-generic/ARM64/MIPS ] Fixes: 44f57d788e7deecb50 ("timekeeping: Provide a generic update_vsyscall() implementation") Signed-off-by: Huacai Chen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Vincenzo Frascino Cc: Arnd Bergmann Cc: Paul Burton Cc: linux-mips@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1571887709-11447-1-git-send-email-chenhc@lemote.com --- arch/arm64/include/asm/vdso/vsyscall.h | 7 ------- arch/mips/include/asm/vdso/vsyscall.h | 7 ------- include/asm-generic/vdso/vsyscall.h | 7 ------- kernel/time/vsyscall.c | 9 +++------ 4 files changed, 3 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index 0c731bfc7c8c..0c20a7c1bee5 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -30,13 +30,6 @@ int __arm64_get_clock_mode(struct timekeeper *tk) } #define __arch_get_clock_mode __arm64_get_clock_mode -static __always_inline -int __arm64_use_vsyscall(struct vdso_data *vdata) -{ - return !vdata[CS_HRES_COARSE].clock_mode; -} -#define __arch_use_vsyscall __arm64_use_vsyscall - static __always_inline void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) { diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h index 195314732233..00d41b94ba31 100644 --- a/arch/mips/include/asm/vdso/vsyscall.h +++ b/arch/mips/include/asm/vdso/vsyscall.h @@ -28,13 +28,6 @@ int __mips_get_clock_mode(struct timekeeper *tk) } #define __arch_get_clock_mode __mips_get_clock_mode -static __always_inline -int __mips_use_vsyscall(struct vdso_data *vdata) -{ - return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE); -} -#define __arch_use_vsyscall __mips_use_vsyscall - /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index e94b19782c92..ce4103208619 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -25,13 +25,6 @@ static __always_inline int __arch_get_clock_mode(struct timekeeper *tk) } #endif /* __arch_get_clock_mode */ -#ifndef __arch_use_vsyscall -static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata) -{ - return 1; -} -#endif /* __arch_use_vsyscall */ - #ifndef __arch_update_vsyscall static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 4bc37ac3bb05..5ee0f7709410 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -110,8 +110,7 @@ void update_vsyscall(struct timekeeper *tk) nsec = nsec + tk->wall_to_monotonic.tv_nsec; vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec); - if (__arch_use_vsyscall(vdata)) - update_vdso_data(vdata, tk); + update_vdso_data(vdata, tk); __arch_update_vsyscall(vdata, tk); @@ -124,10 +123,8 @@ void update_vsyscall_tz(void) { struct vdso_data *vdata = __arch_get_k_vdso_data(); - if (__arch_use_vsyscall(vdata)) { - vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest; - vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime; - } + vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest; + vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime; __arch_sync_vdso_data(vdata); } -- cgit v1.2.3 From fe6f85ca121e9c74e7490fe66b0c5aae38e332c3 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 29 Oct 2019 10:34:19 +0100 Subject: x86/apic/32: Avoid bogus LDR warnings The removal of the LDR initialization in the bigsmp_32 APIC code unearthed a problem in setup_local_APIC(). The code checks unconditionally for a mismatch of the logical APIC id by comparing the early APIC id which was initialized in get_smp_config() with the actual LDR value in the APIC. Due to the removal of the bogus LDR initialization the check now can trigger on bigsmp_32 APIC systems emitting a warning for every booting CPU. This is of course a false positive because the APIC is not using logical destination mode. Restrict the check and the possibly resulting fixup to systems which are actually using the APIC in logical destination mode. [ tglx: Massaged changelog and added Cc stable ] Fixes: bae3a8d3308 ("x86/apic: Do not initialize LDR and DFR for bigsmp") Signed-off-by: Jan Beulich Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/666d8f91-b5a8-1afd-7add-821e72a35f03@suse.com --- arch/x86/kernel/apic/apic.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 9e2dd2b296cd..2b0faf86da1b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1586,9 +1586,6 @@ static void setup_local_APIC(void) { int cpu = smp_processor_id(); unsigned int value; -#ifdef CONFIG_X86_32 - int logical_apicid, ldr_apicid; -#endif if (disable_apic) { disable_ioapic_support(); @@ -1626,16 +1623,21 @@ static void setup_local_APIC(void) apic->init_apic_ldr(); #ifdef CONFIG_X86_32 - /* - * APIC LDR is initialized. If logical_apicid mapping was - * initialized during get_smp_config(), make sure it matches the - * actual value. - */ - logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid); - /* always use the value from LDR */ - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + if (apic->dest_logical) { + int logical_apicid, ldr_apicid; + + /* + * APIC LDR is initialized. If logical_apicid mapping was + * initialized during get_smp_config(), make sure it matches + * the actual value. + */ + logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); + ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + if (logical_apicid != BAD_APICID) + WARN_ON(logical_apicid != ldr_apicid); + /* Always use the value from LDR. */ + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + } #endif /* -- cgit v1.2.3 From 0ed9ca25894ef673d0259e4bd312d5fa1b9a6591 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Sat, 19 Oct 2019 17:07:27 +0800 Subject: irq/irqdomain: Update __irq_domain_alloc_fwnode() function documentation A recent commit changed a parameter of __irq_domain_alloc_fwnode(), but did not update the documentation comment. Fix it up. Fixes: b977fcf477c1 ("irqdomain/debugfs: Use PAs to generate fwnode names") Signed-off-by: Yi Wang Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/1571476047-29463-1-git-send-email-wang.yi59@zte.com.cn --- kernel/irq/irqdomain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 132672b74e4b..dd822fd8a7d5 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops); * @type: Type of irqchip_fwnode. See linux/irqdomain.h * @name: Optional user provided domain name * @id: Optional user provided id if name != NULL - * @data: Optional user-provided data + * @pa: Optional user-provided physical address * * Allocate a struct irqchip_fwid, and return a poiner to the embedded * fwnode_handle (or NULL on failure). -- cgit v1.2.3 From e361362b08cab1098b64b0e5fd8c879f086b3f46 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Oct 2019 20:05:49 +0200 Subject: x86/dumpstack/64: Don't evaluate exception stacks before setup Cyrill reported the following crash: BUG: unable to handle page fault for address: 0000000000001ff0 #PF: supervisor read access in kernel mode RIP: 0010:get_stack_info+0xb3/0x148 It turns out that if the stack tracer is invoked before the exception stack mappings are initialized in_exception_stack() can erroneously classify an invalid address as an address inside of an exception stack: begin = this_cpu_read(cea_exception_stacks); <- 0 end = begin + sizeof(exception stacks); i.e. any address between 0 and end will be considered as exception stack address and the subsequent code will then try to derefence the resulting stack frame at a non mapped address. end = begin + (unsigned long)ep->size; ==> end = 0x2000 regs = (struct pt_regs *)end - 1; ==> regs = 0x2000 - sizeof(struct pt_regs *) = 0x1ff0 info->next_sp = (unsigned long *)regs->sp; ==> Crashes due to accessing 0x1ff0 Prevent this by checking the validity of the cea_exception_stack base address and bailing out if it is zero. Fixes: afcd21dad88b ("x86/dumpstack/64: Use cpu_entry_area instead of orig_ist") Reported-by: Cyrill Gorcunov Signed-off-by: Thomas Gleixner Tested-by: Cyrill Gorcunov Acked-by: Josh Poimboeuf Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1910231950590.1852@nanos.tec.linutronix.de --- arch/x86/kernel/dumpstack_64.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 753b8cfe8b8a..87b97897a881 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -94,6 +94,13 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info) BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); begin = (unsigned long)__this_cpu_read(cea_exception_stacks); + /* + * Handle the case where stack trace is collected _before_ + * cea_exception_stacks had been initialized. + */ + if (!begin) + return false; + end = begin + sizeof(struct cea_exception_stacks); /* Bail if @stack is outside the exception stack area. */ if (stk < begin || stk >= end) -- cgit v1.2.3 From 63ec58b44fcc05efd1542045abd7faf056ac27d9 Mon Sep 17 00:00:00 2001 From: Michael Zhivich Date: Thu, 24 Oct 2019 13:59:45 -0400 Subject: x86/tsc: Respect tsc command line paraemeter for clocksource_tsc_early The introduction of clocksource_tsc_early broke the functionality of "tsc=reliable" and "tsc=nowatchdog" command line parameters, since clocksource_tsc_early is unconditionally registered with CLOCK_SOURCE_MUST_VERIFY and thus put on the watchdog list. This can cause the TSC to be declared unstable during boot: clocksource: timekeeping watchdog on CPU0: Marking clocksource 'tsc-early' as unstable because the skew is too large: clocksource: 'refined-jiffies' wd_now: fffb7018 wd_last: fffb6e9d mask: ffffffff clocksource: 'tsc-early' cs_now: 68a6a7070f6a0 cs_last: 68a69ab6f74d6 mask: ffffffffffffffff tsc: Marking TSC unstable due to clocksource watchdog The corresponding elapsed times are cs_nsec=1224152026 and wd_nsec=378942392, so the watchdog differs from TSC by 0.84 seconds. This happens when HPET is not available and jiffies are used as the TSC watchdog instead and the jiffies update is not happening due to lost timer interrupts in periodic mode, which can happen e.g. with expensive debug mechanisms enabled or under massive overload conditions in virtualized environments. Before the introduction of the early TSC clocksource the command line parameters "tsc=reliable" and "tsc=nowatchdog" could be used to work around this issue. Restore the behaviour by disabling the watchdog if requested on the kernel command line. [ tglx: Clarify changelog ] Fixes: aa83c45762a24 ("x86/tsc: Introduce early tsc clocksource") Signed-off-by: Michael Zhivich Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20191024175945.14338-1-mzhivich@akamai.com --- arch/x86/kernel/tsc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c59454c382fd..7e322e2daaf5 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1505,6 +1505,9 @@ void __init tsc_init(void) return; } + if (tsc_clocksource_reliable || no_tsc_watchdog) + clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + clocksource_register_khz(&clocksource_tsc_early, tsc_khz); detect_art(); } -- cgit v1.2.3 From 443b0636ea7386d01dc460b4a4264e125f710b53 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 17 Oct 2019 17:05:22 -0400 Subject: perf scripting engines: Iterate on tep event arrays directly Instead of calling a useless (and broken) helper function to get the next event of a tep event array, just get the array directly and iterate over it. Note, the broken part was from trace_find_next_event() which after this will no longer be used, and can be removed. Committer notes: This fixes a segfault when generating python scripts from perf.data files with multiple tracepoint events, i.e. the following use case is fixed by this patch: # perf record -e sched:* sleep 1 [ perf record: Woken up 31 times to write data ] [ perf record: Captured and wrote 0.031 MB perf.data (9 samples) ] # perf script -g python Segmentation fault (core dumped) # Reported-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (VMware) Tested-by: Arnaldo Carvalho de Melo Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tzvetomir Stoyanov Cc: linux-trace-devel@vger.kernel.org Link: http://lkml.kernel.org/r/20191017153733.630cd5eb@gandalf.local.home Link: http://lore.kernel.org/lkml/20191017210636.061448713@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-perl.c | 8 ++++++-- tools/perf/util/scripting-engines/trace-event-python.c | 9 +++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 15961854ba67..741f040648b5 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -539,10 +539,11 @@ static int perl_stop_script(void) static int perl_generate_script(struct tep_handle *pevent, const char *outfile) { + int i, not_first, count, nr_events; + struct tep_event **all_events; struct tep_event *event = NULL; struct tep_format_field *f; char fname[PATH_MAX]; - int not_first, count; FILE *ofp; sprintf(fname, "%s.pl", outfile); @@ -603,8 +604,11 @@ sub print_backtrace\n\ }\n\n\ "); + nr_events = tep_get_events_count(pevent); + all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID); - while ((event = trace_find_next_event(pevent, event))) { + for (i = 0; all_events && i < nr_events; i++) { + event = all_events[i]; fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name); fprintf(ofp, "\tmy ("); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 5d341efc3237..93c03b39cd9c 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1687,10 +1687,11 @@ static int python_stop_script(void) static int python_generate_script(struct tep_handle *pevent, const char *outfile) { + int i, not_first, count, nr_events; + struct tep_event **all_events; struct tep_event *event = NULL; struct tep_format_field *f; char fname[PATH_MAX]; - int not_first, count; FILE *ofp; sprintf(fname, "%s.py", outfile); @@ -1735,7 +1736,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile fprintf(ofp, "def trace_end():\n"); fprintf(ofp, "\tprint(\"in trace_end\")\n\n"); - while ((event = trace_find_next_event(pevent, event))) { + nr_events = tep_get_events_count(pevent); + all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID); + + for (i = 0; all_events && i < nr_events; i++) { + event = all_events[i]; fprintf(ofp, "def %s__%s(", event->system, event->name); fprintf(ofp, "event_name, "); fprintf(ofp, "context, "); -- cgit v1.2.3 From 6047e1a81e9fe9851ed37e13c2438312c04435d9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 17 Oct 2019 17:05:23 -0400 Subject: perf tools: Remove unused trace_find_next_event() trace_find_next_event() was buggy and pretty much a useless helper. As there are no more users, just remove it. Signed-off-by: Steven Rostedt (VMware) Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tzvetomir Stoyanov Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20191017210636.224045576@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-parse.c | 31 ------------------------------- tools/perf/util/trace-event.h | 2 -- 2 files changed, 33 deletions(-) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 5d6bfc70b210..9634f0ae57be 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -173,37 +173,6 @@ int parse_event_file(struct tep_handle *pevent, return tep_parse_event(pevent, buf, size, sys); } -struct tep_event *trace_find_next_event(struct tep_handle *pevent, - struct tep_event *event) -{ - static int idx; - int events_count; - struct tep_event *all_events; - - all_events = tep_get_first_event(pevent); - events_count = tep_get_events_count(pevent); - if (!pevent || !all_events || events_count < 1) - return NULL; - - if (!event) { - idx = 0; - return all_events; - } - - if (idx < events_count && event == (all_events + idx)) { - idx++; - if (idx == events_count) - return NULL; - return (all_events + idx); - } - - for (idx = 1; idx < events_count; idx++) { - if (event == (all_events + (idx - 1))) - return (all_events + idx); - } - return NULL; -} - struct flag { const char *name; unsigned long long value; diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 2e158387b3d7..72fdf2a3577c 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -47,8 +47,6 @@ void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int siz ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe); -struct tep_event *trace_find_next_event(struct tep_handle *pevent, - struct tep_event *event); unsigned long long read_size(struct tep_event *event, void *ptr, int size); unsigned long long eval_flag(const char *flag); -- cgit v1.2.3 From 27a0e54bae09d2dd023a01254db506d61cc50ba1 Mon Sep 17 00:00:00 2001 From: Timo Schlüßler Date: Fri, 11 Oct 2019 15:38:19 +0200 Subject: can: mcp251x: mcp251x_restart_work_handler(): Fix potential force_quit race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In mcp251x_restart_work_handler() the variable to stop the interrupt handler (priv->force_quit) is reset after the chip is restarted and thus a interrupt might occur. This patch fixes the potential race condition by resetting force_quit before enabling interrupts. Signed-off-by: Timo Schlüßler Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index bee9f7b8dad6..bb20a9b75cc6 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -717,6 +717,7 @@ static void mcp251x_restart_work_handler(struct work_struct *ws) if (priv->after_suspend) { mcp251x_hw_reset(spi); mcp251x_setup(net, spi); + priv->force_quit = 0; if (priv->after_suspend & AFTER_SUSPEND_RESTART) { mcp251x_set_normal_mode(spi); } else if (priv->after_suspend & AFTER_SUSPEND_UP) { @@ -728,7 +729,6 @@ static void mcp251x_restart_work_handler(struct work_struct *ws) mcp251x_hw_sleep(spi); } priv->after_suspend = 0; - priv->force_quit = 0; } if (priv->restart_tx) { -- cgit v1.2.3 From 3926a3a025d443f6b7a58a2c0c33e7d77c1ca935 Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Thu, 19 Sep 2019 15:53:04 +0200 Subject: can: don't use deprecated license identifiers The "GPL-2.0" license identifier changed to "GPL-2.0-only" in SPDX v3.0. Signed-off-by: Yegor Yefremov Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 2 +- include/uapi/linux/can/bcm.h | 2 +- include/uapi/linux/can/error.h | 2 +- include/uapi/linux/can/gw.h | 2 +- include/uapi/linux/can/j1939.h | 2 +- include/uapi/linux/can/netlink.h | 2 +- include/uapi/linux/can/raw.h | 2 +- include/uapi/linux/can/vxcan.h | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 1e988fdeba34..6a6d2c7655ff 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * linux/can.h * diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h index 0fb328d93148..dd2b925b09ac 100644 --- a/include/uapi/linux/can/bcm.h +++ b/include/uapi/linux/can/bcm.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * linux/can/bcm.h * diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h index bfc4b5d22a5e..34633283de64 100644 --- a/include/uapi/linux/can/error.h +++ b/include/uapi/linux/can/error.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * linux/can/error.h * diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 3aea5388c8e4..c2190bbe21d8 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * linux/can/gw.h * diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h index c32325342d30..df6e821075c1 100644 --- a/include/uapi/linux/can/j1939.h +++ b/include/uapi/linux/can/j1939.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* * j1939.h * diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 1bc70d3a4d39..6f598b73839e 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* * linux/can/netlink.h * diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index be3b36e7ff61..6a11d308eb5c 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * linux/can/raw.h * diff --git a/include/uapi/linux/can/vxcan.h b/include/uapi/linux/can/vxcan.h index 066812d118a2..4fa9d8777a07 100644 --- a/include/uapi/linux/can/vxcan.h +++ b/include/uapi/linux/can/vxcan.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ #ifndef _UAPI_CAN_VXCAN_H #define _UAPI_CAN_VXCAN_H -- cgit v1.2.3 From 722ddfde366fd46205456a9c5ff9b3359dc9a75e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 5 Nov 2019 00:27:11 +0100 Subject: perf tools: Fix time sorting The final sort might get confused when the comparison is done over bigger numbers than int like for -s time. Check the following report for longer workloads: $ perf report -s time -F time,overhead --stdio Fix hist_entry__sort() to properly return int64_t and not possible cut int. Fixes: 043ca389a318 ("perf tools: Use hpp formats to sort final output") Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org # v3.16+ Link: http://lore.kernel.org/lkml/20191104232711.16055-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 679a1d75090c..7b6eaf5e0bda 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1625,7 +1625,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) return 0; } -static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) +static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b) { struct hists *hists = a->hists; struct perf_hpp_fmt *fmt; -- cgit v1.2.3 From 57baec7b1b0459ef885e816d8c28a9d9a62bb8de Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Tue, 5 Nov 2019 11:10:23 +0100 Subject: scripts/nsdeps: make sure to pass all module source files to spatch The nsdeps script passes a list of the module source files to generate_deps_for_ns() as a space delimited string named $mod_source_files, which then passes it to spatch. But since $mod_source_files is not encased in quotes, each source file in that string is treated as a separate shell function argument (as $2, $3, $4, etc.). However, the spatch invocation only refers to $2, so only the first file out of $mod_source_files is processed by spatch. This causes problems (namely, the MODULE_IMPORT_NS() statement doesn't get inserted) when a module is composed of many source files and the "main" module file containing the MODULE_LICENSE() statement is not the first file listed in $mod_source_files. Fix this by encasing $mod_source_files in quotes so that the entirety of the string is treated as a single argument and can be referred to as $2. In addition, put quotes in the variable assignment of mod_source_files to prevent any shell interpretation and field splitting. Reviewed-by: Masahiro Yamada Acked-by: Matthias Maennich Signed-off-by: Jessica Yu --- scripts/nsdeps | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/nsdeps b/scripts/nsdeps index dda6fbac016e..04cea0921673 100644 --- a/scripts/nsdeps +++ b/scripts/nsdeps @@ -31,12 +31,12 @@ generate_deps() { local mod_file=`echo $@ | sed -e 's/\.ko/\.mod/'` local ns_deps_file=`echo $@ | sed -e 's/\.ko/\.ns_deps/'` if [ ! -f "$ns_deps_file" ]; then return; fi - local mod_source_files=`cat $mod_file | sed -n 1p \ + local mod_source_files="`cat $mod_file | sed -n 1p \ | sed -e 's/\.o/\.c/g' \ - | sed "s|[^ ]* *|${srctree}/&|g"` + | sed "s|[^ ]* *|${srctree}/&|g"`" for ns in `cat $ns_deps_file`; do echo "Adding namespace $ns to module $mod_name (if needed)." - generate_deps_for_ns $ns $mod_source_files + generate_deps_for_ns $ns "$mod_source_files" # sort the imports for source_file in $mod_source_files; do sed '/MODULE_IMPORT_NS/Q' $source_file > ${source_file}.tmp -- cgit v1.2.3 From 15c2b3cc09a31620914955cb2a89c277c18ee999 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 5 Nov 2019 14:43:16 +0100 Subject: ALSA: hda/ca0132 - Fix possible workqueue stall The unsolicited event handler for the headphone jack on CA0132 codec driver tries to reschedule the another delayed work with cancel_delayed_work_sync(). It's no good idea, unfortunately, especially after we changed the work queue to the standard global one; this may lead to a stall because both works are using the same global queue. Fix it by dropping the _sync but does call cancel_delayed_work() instead. Fixes: 993884f6a26c ("ALSA: hda/ca0132 - Delay HP amp turnon.") BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1155836 Cc: Link: https://lore.kernel.org/r/20191105134316.19294-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 6d1fb7c11f17..b7a1abb3e231 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -7604,7 +7604,7 @@ static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb) /* Delay enabling the HP amp, to let the mic-detection * state machine run. */ - cancel_delayed_work_sync(&spec->unsol_hp_work); + cancel_delayed_work(&spec->unsol_hp_work); schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500)); tbl = snd_hda_jack_tbl_get(codec, cb->nid); if (tbl) -- cgit v1.2.3 From 5bb5e6ee6f5c557dcd19822eccd7bcced1e1a410 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Oct 2019 12:15:10 -0400 Subject: ceph: don't try to handle hashed dentries in non-O_CREAT atomic_open If ceph_atomic_open is handed a !d_in_lookup dentry, then that means that it already passed d_revalidate so we *know* that it's negative (or at least was very recently). Just return -ENOENT in that case. This also addresses a subtle bug in dentry handling. Non-O_CREAT opens call atomic_open with the parent's i_rwsem shared, but calling d_splice_alias on a hashed dentry requires the exclusive lock. If ceph_atomic_open receives a hashed, negative dentry on a non-O_CREAT open, and another client were to race in and create the file before we issue our OPEN, ceph_fill_trace could end up calling d_splice_alias on the dentry with the new inode with insufficient locks. Cc: stable@vger.kernel.org Reported-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d277f71abe0b..d2854cd2f4f5 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -462,6 +462,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = ceph_security_init_secctx(dentry, mode, &as_ctx); if (err < 0) goto out_ctx; + } else if (!d_in_lookup(dentry)) { + /* If it's not being looked up, it's negative */ + return -ENOENT; } /* do the open */ -- cgit v1.2.3 From a3a0819388b2bf15e7eafe38ff6aacfc27b12df0 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Thu, 31 Oct 2019 11:49:39 +0000 Subject: ceph: don't allow copy_file_range when stripe_count != 1 copy_file_range tries to use the OSD 'copy-from' operation, which simply performs a full object copy. Unfortunately, the implementation of this system call assumes that stripe_count is always set to 1 and doesn't take into account that the data may be striped across an object set. If the file layout has stripe_count different from 1, then the destination file data will be corrupted. For example: Consider a 8 MiB file with 4 MiB object size, stripe_count of 2 and stripe_size of 2 MiB; the first half of the file will be filled with 'A's and the second half will be filled with 'B's: 0 4M 8M Obj1 Obj2 +------+------+ +----+ +----+ file: | AAAA | BBBB | | AA | | AA | +------+------+ |----| |----| | BB | | BB | +----+ +----+ If we copy_file_range this file into a new file (which needs to have the same file layout!), then it will start by copying the object starting at file offset 0 (Obj1). And then it will copy the object starting at file offset 4M -- which is Obj1 again. Unfortunately, the solution for this is to not allow remote object copies to be performed when the file layout stripe_count is not 1 and simply fallback to the default (VFS) copy_file_range implementation. Cc: stable@vger.kernel.org Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d2854cd2f4f5..bd77adb64bfd 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1959,10 +1959,18 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, if (ceph_test_mount_opt(src_fsc, NOCOPYFROM)) return -EOPNOTSUPP; + /* + * Striped file layouts require that we copy partial objects, but the + * OSD copy-from operation only supports full-object copies. Limit + * this to non-striped file layouts for now. + */ if ((src_ci->i_layout.stripe_unit != dst_ci->i_layout.stripe_unit) || - (src_ci->i_layout.stripe_count != dst_ci->i_layout.stripe_count) || - (src_ci->i_layout.object_size != dst_ci->i_layout.object_size)) + (src_ci->i_layout.stripe_count != 1) || + (dst_ci->i_layout.stripe_count != 1) || + (src_ci->i_layout.object_size != dst_ci->i_layout.object_size)) { + dout("Invalid src/dst files layout\n"); return -EOPNOTSUPP; + } if (len < src_ci->i_layout.object_size) return -EOPNOTSUPP; /* no remote copy will be done */ -- cgit v1.2.3 From fa729c4df558936b4a1a7b3e2234011f44ede28b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 31 Oct 2019 12:36:08 +0100 Subject: clone3: validate stack arguments Validate the stack arguments and setup the stack depening on whether or not it is growing down or up. Legacy clone() required userspace to know in which direction the stack is growing and pass down the stack pointer appropriately. To make things more confusing microblaze uses a variant of the clone() syscall selected by CONFIG_CLONE_BACKWARDS3 that takes an additional stack_size argument. IA64 has a separate clone2() syscall which also takes an additional stack_size argument. Finally, parisc has a stack that is growing upwards. Userspace therefore has a lot nasty code like the following: #define __STACK_SIZE (8 * 1024 * 1024) pid_t sys_clone(int (*fn)(void *), void *arg, int flags, int *pidfd) { pid_t ret; void *stack; stack = malloc(__STACK_SIZE); if (!stack) return -ENOMEM; #ifdef __ia64__ ret = __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, pidfd); #elif defined(__parisc__) /* stack grows up */ ret = clone(fn, stack, flags | SIGCHLD, arg, pidfd); #else ret = clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, pidfd); #endif return ret; } or even crazier variants such as [3]. With clone3() we have the ability to validate the stack. We can check that when stack_size is passed, the stack pointer is valid and the other way around. We can also check that the memory area userspace gave us is fine to use via access_ok(). Furthermore, we probably should not require userspace to know in which direction the stack is growing. It is easy for us to do this in the kernel and I couldn't find the original reasoning behind exposing this detail to userspace. /* Intentional user visible API change */ clone3() was released with 5.3. Currently, it is not documented and very unclear to userspace how the stack and stack_size argument have to be passed. After talking to glibc folks we concluded that trying to change clone3() to setup the stack instead of requiring userspace to do this is the right course of action. Note, that this is an explicit change in user visible behavior we introduce with this patch. If it breaks someone's use-case we will revert! (And then e.g. place the new behavior under an appropriate flag.) Breaking someone's use-case is very unlikely though. First, neither glibc nor musl currently expose a wrapper for clone3(). Second, there is no real motivation for anyone to use clone3() directly since it does not provide features that legacy clone doesn't. New features for clone3() will first happen in v5.5 which is why v5.4 is still a good time to try and make that change now and backport it to v5.3. Searches on [4] did not reveal any packages calling clone3(). [1]: https://lore.kernel.org/r/CAG48ez3q=BeNcuVTKBN79kJui4vC6nw0Bfq6xc-i0neheT17TA@mail.gmail.com [2]: https://lore.kernel.org/r/20191028172143.4vnnjpdljfnexaq5@wittgenstein [3]: https://github.com/systemd/systemd/blob/5238e9575906297608ff802a27e2ff9effa3b338/src/basic/raw-clone.h#L31 [4]: https://codesearch.debian.net Fixes: 7f192e3cd316 ("fork: add clone3") Cc: Kees Cook Cc: Jann Horn Cc: David Howells Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Linus Torvalds Cc: Florian Weimer Cc: Peter Zijlstra Cc: linux-api@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: # 5.3 Cc: GNU C Library Signed-off-by: Christian Brauner Acked-by: Arnd Bergmann Acked-by: Aleksa Sarai Link: https://lore.kernel.org/r/20191031113608.20713-1-christian.brauner@ubuntu.com --- include/uapi/linux/sched.h | 4 ++++ kernel/fork.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 99335e1f4a27..25b4fa00bad1 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -51,6 +51,10 @@ * sent when the child exits. * @stack: Specify the location of the stack for the * child process. + * Note, @stack is expected to point to the + * lowest address. The stack direction will be + * determined by the kernel and set up + * appropriately based on @stack_size. * @stack_size: The size of the stack for the child process. * @tls: If CLONE_SETTLS is set, the tls descriptor * is set to tls. diff --git a/kernel/fork.c b/kernel/fork.c index bcdf53125210..55af6931c6ec 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2561,7 +2561,35 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, return 0; } -static bool clone3_args_valid(const struct kernel_clone_args *kargs) +/** + * clone3_stack_valid - check and prepare stack + * @kargs: kernel clone args + * + * Verify that the stack arguments userspace gave us are sane. + * In addition, set the stack direction for userspace since it's easy for us to + * determine. + */ +static inline bool clone3_stack_valid(struct kernel_clone_args *kargs) +{ + if (kargs->stack == 0) { + if (kargs->stack_size > 0) + return false; + } else { + if (kargs->stack_size == 0) + return false; + + if (!access_ok((void __user *)kargs->stack, kargs->stack_size)) + return false; + +#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64) + kargs->stack += kargs->stack_size; +#endif + } + + return true; +} + +static bool clone3_args_valid(struct kernel_clone_args *kargs) { /* * All lower bits of the flag word are taken. @@ -2581,6 +2609,9 @@ static bool clone3_args_valid(const struct kernel_clone_args *kargs) kargs->exit_signal) return false; + if (!clone3_stack_valid(kargs)) + return false; + return true; } -- cgit v1.2.3 From 9ad9e8d6ca29c1446d81c6518ae634a2141dfd22 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 29 Oct 2019 16:42:27 +0200 Subject: nvme-rdma: fix a segmentation fault during module unload In case there are controllers that are not associated with any RDMA device (e.g. during unsuccessful reconnection) and the user will unload the module, these controllers will not be freed and will access already freed memory. The same logic appears in other fabric drivers as well. Fixes: 87fd125344d6 ("nvme-rdma: remove redundant reference between ib_device and tagset") Reviewed-by: Sagi Grimberg Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f19a28b4e997..cb4c3000a57e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2133,8 +2133,16 @@ err_unreg_client: static void __exit nvme_rdma_cleanup_module(void) { + struct nvme_rdma_ctrl *ctrl; + nvmf_unregister_transport(&nvme_rdma_transport); ib_unregister_client(&nvme_rdma_ib_client); + + mutex_lock(&nvme_rdma_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) + nvme_delete_ctrl(&ctrl->ctrl); + mutex_unlock(&nvme_rdma_ctrl_mutex); + flush_workqueue(nvme_delete_wq); } module_init(nvme_rdma_init_module); -- cgit v1.2.3 From 763303a83a095a88c3a8a0d1abf97165db2e8bf5 Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Fri, 1 Nov 2019 17:27:55 -0700 Subject: nvme-multipath: fix crash in nvme_mpath_clear_ctrl_paths nvme_mpath_clear_ctrl_paths() iterates through the ctrl->namespaces list while holding ctrl->scan_lock. This does not seem to be the correct way of protecting from concurrent list modification. Specifically, nvme_scan_work() sorts ctrl->namespaces AFTER unlocking scan_lock. This may result in the following (rare) crash in ctrl disconnect during scan_work: BUG: kernel NULL pointer dereference, address: 0000000000000050 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 3995 Comm: nvme 5.3.5-050305-generic RIP: 0010:nvme_mpath_clear_current_path+0xe/0x90 [nvme_core] ... Call Trace: nvme_mpath_clear_ctrl_paths+0x3c/0x70 [nvme_core] nvme_remove_namespaces+0x35/0xe0 [nvme_core] nvme_do_delete_ctrl+0x47/0x90 [nvme_core] nvme_sysfs_delete+0x49/0x60 [nvme_core] dev_attr_store+0x17/0x30 sysfs_kf_write+0x3e/0x50 kernfs_fop_write+0x11e/0x1a0 __vfs_write+0x1b/0x40 vfs_write+0xb9/0x1a0 ksys_write+0x67/0xe0 __x64_sys_write+0x1a/0x20 do_syscall_64+0x5a/0x130 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f8d02bfb154 Fix: After taking scan_lock in nvme_mpath_clear_ctrl_paths() down_read(&ctrl->namespaces_rwsem) as well to make list traversal safe. This will not cause deadlocks because taking scan_lock never happens while holding the namespaces_rwsem. Moreover, scan work downs namespaces_rwsem in the same order. Alternative: sort ctrl->namespaces in nvme_scan_work() while still holding the scan_lock. This would leave nvme_mpath_clear_ctrl_paths() without correct protection against ctrl->namespaces modification by anyone other than scan_work. Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Anton Eidelman Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index fc99a40c1ec4..e0f064dcbd02 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -158,9 +158,11 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) struct nvme_ns *ns; mutex_lock(&ctrl->scan_lock); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) if (nvme_mpath_clear_current_path(ns)) kblockd_schedule_work(&ns->head->requeue_work); + up_read(&ctrl->namespaces_rwsem); mutex_unlock(&ctrl->scan_lock); } -- cgit v1.2.3 From c58e81341aecf79c8ce6ea79fee31b3193cd6e78 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 8 Oct 2019 09:36:16 +0200 Subject: watchdog: cpwd: fix build regression The compat_ptr_ioctl() infrastructure did not make it into linux-5.4, so cpwd now fails to build. Fix it by using an open-coded version. Fixes: 68f28b01fb9e ("watchdog: cpwd: use generic compat_ptr_ioctl") Signed-off-by: Arnd Bergmann Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/cpwd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c index 9393be584e72..808eeb4779e4 100644 --- a/drivers/watchdog/cpwd.c +++ b/drivers/watchdog/cpwd.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -473,6 +474,11 @@ static long cpwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; } +static long cpwd_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return cpwd_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} + static ssize_t cpwd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -497,7 +503,7 @@ static ssize_t cpwd_read(struct file *file, char __user *buffer, static const struct file_operations cpwd_fops = { .owner = THIS_MODULE, .unlocked_ioctl = cpwd_ioctl, - .compat_ioctl = compat_ptr_ioctl, + .compat_ioctl = cpwd_compat_ioctl, .open = cpwd_open, .write = cpwd_write, .read = cpwd_read, -- cgit v1.2.3 From 1993f1d7ca3f315e0459c58c8e7038039a96dd85 Mon Sep 17 00:00:00 2001 From: Jorge Ramirez-Ortiz Date: Fri, 6 Sep 2019 22:30:53 +0200 Subject: watchdog: pm8916_wdt: fix pretimeout registration flow When an IRQ is present in the dts, the probe function shall fail if the interrupt can not be registered. The probe function shall also be retried if getting the irq is being deferred. Signed-off-by: Jorge Ramirez-Ortiz Reviewed-by: Loic Poulain Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/pm8916_wdt.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/pm8916_wdt.c b/drivers/watchdog/pm8916_wdt.c index 2d3652004e39..1213179f863c 100644 --- a/drivers/watchdog/pm8916_wdt.c +++ b/drivers/watchdog/pm8916_wdt.c @@ -163,9 +163,17 @@ static int pm8916_wdt_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq > 0) { - if (devm_request_irq(dev, irq, pm8916_wdt_isr, 0, "pm8916_wdt", - wdt)) - irq = 0; + err = devm_request_irq(dev, irq, pm8916_wdt_isr, 0, + "pm8916_wdt", wdt); + if (err) + return err; + + wdt->wdev.info = &pm8916_wdt_pt_ident; + } else { + if (irq == -EPROBE_DEFER) + return -EPROBE_DEFER; + + wdt->wdev.info = &pm8916_wdt_ident; } /* Configure watchdog to hard-reset mode */ @@ -177,7 +185,6 @@ static int pm8916_wdt_probe(struct platform_device *pdev) return err; } - wdt->wdev.info = (irq > 0) ? &pm8916_wdt_pt_ident : &pm8916_wdt_ident, wdt->wdev.ops = &pm8916_wdt_ops, wdt->wdev.parent = dev; wdt->wdev.min_timeout = PM8916_WDT_MIN_TIMEOUT; -- cgit v1.2.3 From 2c77734642d52448aca673e889b39f981110828b Mon Sep 17 00:00:00 2001 From: Xingyu Chen Date: Sun, 29 Sep 2019 18:53:49 +0800 Subject: watchdog: meson: Fix the wrong value of left time The left time value is wrong when we get it by sysfs. The left time value should be equal to preset timeout value minus elapsed time value. According to the Meson-GXB/GXL datasheets which can be found at [0], the timeout value is saved to BIT[0-15] of the WATCHDOG_TCNT, and elapsed time value is saved to BIT[16-31] of the WATCHDOG_TCNT. [0]: http://linux-meson.com Fixes: 683fa50f0e18 ("watchdog: Add Meson GXBB Watchdog Driver") Signed-off-by: Xingyu Chen Acked-by: Neil Armstrong Reviewed-by: Kevin Hilman Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/meson_gxbb_wdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/meson_gxbb_wdt.c b/drivers/watchdog/meson_gxbb_wdt.c index d17c1a6ed723..5a9ca10fbcfa 100644 --- a/drivers/watchdog/meson_gxbb_wdt.c +++ b/drivers/watchdog/meson_gxbb_wdt.c @@ -89,8 +89,8 @@ static unsigned int meson_gxbb_wdt_get_timeleft(struct watchdog_device *wdt_dev) reg = readl(data->reg_base + GXBB_WDT_TCNT_REG); - return ((reg >> GXBB_WDT_TCNT_CNT_SHIFT) - - (reg & GXBB_WDT_TCNT_SETUP_MASK)) / 1000; + return ((reg & GXBB_WDT_TCNT_SETUP_MASK) - + (reg >> GXBB_WDT_TCNT_CNT_SHIFT)) / 1000; } static const struct watchdog_ops meson_gxbb_wdt_ops = { -- cgit v1.2.3 From 2c50a6b825b3463a7600d6e6acadba73211c3d2c Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 9 Oct 2019 15:37:47 +0800 Subject: watchdog: imx_sc_wdt: Pretimeout should follow SCU firmware format SCU firmware calculates pretimeout based on current time stamp instead of watchdog timeout stamp, need to convert the pretimeout to SCU firmware's timeout value. Fixes: 15f7d7fc5542 ("watchdog: imx_sc: Add pretimeout support") Signed-off-by: Anson Huang Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/imx_sc_wdt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/imx_sc_wdt.c b/drivers/watchdog/imx_sc_wdt.c index 7ea5cf54e94a..8ed89f032ebf 100644 --- a/drivers/watchdog/imx_sc_wdt.c +++ b/drivers/watchdog/imx_sc_wdt.c @@ -99,8 +99,14 @@ static int imx_sc_wdt_set_pretimeout(struct watchdog_device *wdog, { struct arm_smccc_res res; + /* + * SCU firmware calculates pretimeout based on current time + * stamp instead of watchdog timeout stamp, need to convert + * the pretimeout to SCU firmware's timeout value. + */ arm_smccc_smc(IMX_SIP_TIMER, IMX_SIP_TIMER_SET_PRETIME_WDOG, - pretimeout * 1000, 0, 0, 0, 0, 0, &res); + (wdog->timeout - pretimeout) * 1000, 0, 0, 0, + 0, 0, &res); if (res.a0) return -EACCES; -- cgit v1.2.3 From 81363f248aecd2b5f10547af268a4dfaf8963489 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 23 Oct 2019 15:41:59 +0300 Subject: watchdog: bd70528: Add MODULE_ALIAS to allow module auto loading The bd70528 watchdog driver is probed by MFD driver. Add MODULE_ALIAS in order to allow udev to load the module when MFD sub-device cell for watchdog is added. Fixes: bbc88a0ec9f37 ("watchdog: bd70528: Initial support for ROHM BD70528 watchdog block") Signed-off-by: Matti Vaittinen Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/bd70528_wdt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/bd70528_wdt.c b/drivers/watchdog/bd70528_wdt.c index b0152fef4fc7..bc60e036627a 100644 --- a/drivers/watchdog/bd70528_wdt.c +++ b/drivers/watchdog/bd70528_wdt.c @@ -288,3 +288,4 @@ module_platform_driver(bd70528_wdt); MODULE_AUTHOR("Matti Vaittinen "); MODULE_DESCRIPTION("BD70528 watchdog driver"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:bd70528-wdt"); -- cgit v1.2.3 From eaf072e512d54c95b0977eda06cbca3151ace1e5 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Mon, 4 Nov 2019 14:36:54 +0100 Subject: ASoC: stm32: sai: add restriction on mmap support Do not support mmap in S/PDIF mode. In S/PDIF mode the buffer has to be copied, to allow the channel status bits insertion. Signed-off-by: Olivier Moysan Link: https://lore.kernel.org/r/20191104133654.28750-1-olivier.moysan@st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index a4060813bc74..48e629ac2d88 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -1218,6 +1218,16 @@ static int stm32_sai_pcm_process_spdif(struct snd_pcm_substream *substream, return 0; } +/* No support of mmap in S/PDIF mode */ +static const struct snd_pcm_hardware stm32_sai_pcm_hw_spdif = { + .info = SNDRV_PCM_INFO_INTERLEAVED, + .buffer_bytes_max = 8 * PAGE_SIZE, + .period_bytes_min = 1024, + .period_bytes_max = PAGE_SIZE, + .periods_min = 2, + .periods_max = 8, +}; + static const struct snd_pcm_hardware stm32_sai_pcm_hw = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP, .buffer_bytes_max = 8 * PAGE_SIZE, @@ -1270,7 +1280,7 @@ static const struct snd_dmaengine_pcm_config stm32_sai_pcm_config = { }; static const struct snd_dmaengine_pcm_config stm32_sai_pcm_config_spdif = { - .pcm_hardware = &stm32_sai_pcm_hw, + .pcm_hardware = &stm32_sai_pcm_hw_spdif, .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config, .process = stm32_sai_pcm_process_spdif, }; -- cgit v1.2.3 From e44f3d49f900c645af434a3a1dfdbfb79c4a7851 Mon Sep 17 00:00:00 2001 From: Yong Zhi Date: Mon, 4 Nov 2019 16:09:59 -0600 Subject: ASoC: max98373: replace gpio_request with devm_gpio_request Use devm_gpio_request() to automatic unroll when fails and avoid resource leaks at error paths. Signed-off-by: Yong Zhi Link: https://lore.kernel.org/r/1572905399-22402-1-git-send-email-yong.zhi@intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98373.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c index eb709d528259..cae1def8902d 100644 --- a/sound/soc/codecs/max98373.c +++ b/sound/soc/codecs/max98373.c @@ -960,11 +960,11 @@ static int max98373_i2c_probe(struct i2c_client *i2c, /* Power on device */ if (gpio_is_valid(max98373->reset_gpio)) { - ret = gpio_request(max98373->reset_gpio, "MAX98373_RESET"); + ret = devm_gpio_request(&i2c->dev, max98373->reset_gpio, + "MAX98373_RESET"); if (ret) { dev_err(&i2c->dev, "%s: Failed to request gpio %d\n", __func__, max98373->reset_gpio); - gpio_free(max98373->reset_gpio); return -EINVAL; } gpio_direction_output(max98373->reset_gpio, 0); -- cgit v1.2.3 From 9a11ba7388f165762549903492fc34d29bbb3c04 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Tue, 5 Nov 2019 18:10:53 +0200 Subject: ALSA: hda: hdmi - add Tigerlake support Add Tigerlake HDMI codec support. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=205379 BugLink: https://bugs.freedesktop.org/show_bug.cgi?id=112171 Cc: Pan Xiuli Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20191105161053.22958-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index b72553710ffb..3c720703ebb8 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2851,6 +2851,18 @@ static int patch_i915_icl_hdmi(struct hda_codec *codec) return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map)); } +static int patch_i915_tgl_hdmi(struct hda_codec *codec) +{ + /* + * pin to port mapping table where the value indicate the pin number and + * the index indicate the port number with 1 base. + */ + static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf}; + + return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map)); +} + + /* Intel Baytrail and Braswell; with eld notifier */ static int patch_i915_byt_hdmi(struct hda_codec *codec) { @@ -4153,6 +4165,7 @@ HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), +HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi), HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi), -- cgit v1.2.3 From 0d6eeb1fd625272bd60d25f2d5e116cf582fc7dc Mon Sep 17 00:00:00 2001 From: Charles Machalow Date: Mon, 4 Nov 2019 22:15:10 -0800 Subject: nvme: change nvme_passthru_cmd64 to explicitly mark rsvd Changing nvme_passthru_cmd64 to add a field: rsvd2. This field is an explicit marker for the padding space added on certain platforms as a result of the enlargement of the result field from 32 bit to 64 bits in size, and fixes differences in struct size when using compat ioctl for 32-bit binaries on 64-bit architecture. Fixes: 65e68edce0db ("nvme: allow 64-bit results in passthru commands") Reviewed-by: Christoph Hellwig Signed-off-by: Charles Machalow [changelog] Signed-off-by: Keith Busch --- include/uapi/linux/nvme_ioctl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index e168dc59e9a0..d99b5a772698 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -63,6 +63,7 @@ struct nvme_passthru_cmd64 { __u32 cdw14; __u32 cdw15; __u32 timeout_ms; + __u32 rsvd2; __u64 result; }; -- cgit v1.2.3 From 0763b3e81a82477363d72548604455bf9468c2fa Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Sat, 2 Nov 2019 01:28:28 +0200 Subject: taprio: fix panic while hw offload sched list swap Don't swap oper and admin schedules too early, it's not correct and causes crash. Steps to reproduce: 1) tc qdisc replace dev eth0 parent root handle 100 taprio \ num_tc 3 \ map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \ queues 1@0 1@1 1@2 \ base-time $SOME_BASE_TIME \ sched-entry S 01 80000 \ sched-entry S 02 15000 \ sched-entry S 04 40000 \ flags 2 2) tc qdisc replace dev eth0 parent root handle 100 taprio \ base-time $SOME_BASE_TIME \ sched-entry S 01 90000 \ sched-entry S 02 20000 \ sched-entry S 04 40000 \ flags 2 3) tc qdisc replace dev eth0 parent root handle 100 taprio \ base-time $SOME_BASE_TIME \ sched-entry S 01 150000 \ sched-entry S 02 200000 \ sched-entry S 04 40000 \ flags 2 Do 2 3 2 .. steps more times if not happens and observe: [ 305.832319] Unable to handle kernel write to read-only memory at virtual address ffff0000087ce7f0 [ 305.910887] CPU: 0 PID: 0 Comm: swapper/0 Not tainted [ 305.919306] Hardware name: Texas Instruments AM654 Base Board (DT) [...] [ 306.017119] x1 : ffff800848031d88 x0 : ffff800848031d80 [ 306.022422] Call trace: [ 306.024866] taprio_free_sched_cb+0x4c/0x98 [ 306.029040] rcu_process_callbacks+0x25c/0x410 [ 306.033476] __do_softirq+0x10c/0x208 [ 306.037132] irq_exit+0xb8/0xc8 [ 306.040267] __handle_domain_irq+0x64/0xb8 [ 306.044352] gic_handle_irq+0x7c/0x178 [ 306.048092] el1_irq+0xb0/0x128 [ 306.051227] arch_cpu_idle+0x10/0x18 [ 306.054795] do_idle+0x120/0x138 [ 306.058015] cpu_startup_entry+0x20/0x28 [ 306.061931] rest_init+0xcc/0xd8 [ 306.065154] start_kernel+0x3bc/0x3e4 [ 306.068810] Code: f2fbd5b7 f2fbd5b6 d503201f f9400422 (f9000662) [ 306.074900] ---[ end trace 96c8e2284a9d9d6e ]--- [ 306.079507] Kernel panic - not syncing: Fatal exception in interrupt [ 306.085847] SMP: stopping secondary CPUs [ 306.089765] Kernel Offset: disabled Try to explain one of the possible crash cases: The "real" admin list is assigned when admin_sched is set to new_admin, it happens after "swap", that assigns to oper_sched NULL. Thus if call qdisc show it can crash. Farther, next second time, when sched list is updated, the admin_sched is not NULL and becomes the oper_sched, previous oper_sched was NULL so just skipped. But then admin_sched is assigned new_admin, but schedules to free previous assigned admin_sched (that already became oper_sched). Farther, next third time, when sched list is updated, while one more swap, oper_sched is not null, but it was happy to be freed already (while prev. admin update), so while try to free oper_sched the kernel panic happens at taprio_free_sched_cb(). So, move the "swap emulation" where it should be according to function comment from code. Fixes: 9c66d15646760e ("taprio: Add support for hardware offloading") Signed-off-by: Ivan Khoronzhuk Acked-by: Vinicius Costa Gomes Tested-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 2121187229cd..7cd68628c637 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1224,8 +1224,6 @@ static int taprio_enable_offload(struct net_device *dev, goto done; } - taprio_offload_config_changed(q); - done: taprio_offload_free(offload); @@ -1505,6 +1503,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, call_rcu(&admin->rcu, taprio_free_sched_cb); spin_unlock_irqrestore(&q->current_entry_lock, flags); + + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + taprio_offload_config_changed(q); } new_admin = NULL; -- cgit v1.2.3 From 1899bb325149e481de31a4f32b59ea6f24e176ea Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 1 Nov 2019 21:56:42 -0700 Subject: bonding: fix state transition issue in link monitoring Since de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring"), the bonding driver has utilized two separate variables to indicate the next link state a particular slave should transition to. Each is used to communicate to a different portion of the link state change commit logic; one to the bond_miimon_commit function itself, and another to the state transition logic. Unfortunately, the two variables can become unsynchronized, resulting in incorrect link state transitions within bonding. This can cause slaves to become stuck in an incorrect link state until a subsequent carrier state transition. The issue occurs when a special case in bond_slave_netdev_event sets slave->link directly to BOND_LINK_FAIL. On the next pass through bond_miimon_inspect after the slave goes carrier up, the BOND_LINK_FAIL case will set the proposed next state (link_new_state) to BOND_LINK_UP, but the new_link to BOND_LINK_DOWN. The setting of the final link state from new_link comes after that from link_new_state, and so the slave will end up incorrectly in _DOWN state. Resolve this by combining the two variables into one. Reported-by: Aleksei Zakharov Reported-by: Sha Zhang Cc: Mahesh Bandewar Fixes: de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring") Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 44 ++++++++++++++++++++--------------------- include/net/bonding.h | 3 +-- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 480f9459b402..62f65573eb04 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2083,8 +2083,7 @@ static int bond_miimon_inspect(struct bonding *bond) ignore_updelay = !rcu_dereference(bond->curr_active_slave); bond_for_each_slave_rcu(bond, slave, iter) { - slave->new_link = BOND_LINK_NOCHANGE; - slave->link_new_state = slave->link; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2118,7 +2117,7 @@ static int bond_miimon_inspect(struct bonding *bond) } if (slave->delay <= 0) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; continue; } @@ -2155,7 +2154,7 @@ static int bond_miimon_inspect(struct bonding *bond) slave->delay = 0; if (slave->delay <= 0) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); commit++; ignore_updelay = false; continue; @@ -2193,7 +2192,7 @@ static void bond_miimon_commit(struct bonding *bond) struct slave *slave, *primary; bond_for_each_slave(bond, slave, iter) { - switch (slave->new_link) { + switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: /* For 802.3ad mode, check current slave speed and * duplex again in case its port was disabled after @@ -2265,8 +2264,8 @@ static void bond_miimon_commit(struct bonding *bond) default: slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n", - slave->new_link); - slave->new_link = BOND_LINK_NOCHANGE; + slave->link_new_state); + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); continue; } @@ -2674,13 +2673,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); - slave->new_link = BOND_LINK_NOCHANGE; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, trans_start, 1) && bond_time_in_interval(bond, slave->last_rx, 1)) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); slave_state_changed = 1; /* primary_slave has no meaning in round-robin @@ -2705,7 +2704,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) if (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, slave->last_rx, 2)) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) @@ -2736,8 +2735,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) goto re_arm; bond_for_each_slave(bond, slave, iter) { - if (slave->new_link != BOND_LINK_NOCHANGE) - slave->link = slave->new_link; + if (slave->link_new_state != BOND_LINK_NOCHANGE) + slave->link = slave->link_new_state; } if (slave_state_changed) { @@ -2760,9 +2759,9 @@ re_arm: } /* Called to inspect slaves for active-backup mode ARP monitor link state - * changes. Sets new_link in slaves to specify what action should take - * place for the slave. Returns 0 if no changes are found, >0 if changes - * to link states must be committed. + * changes. Sets proposed link state in slaves to specify what action + * should take place for the slave. Returns 0 if no changes are found, >0 + * if changes to link states must be committed. * * Called with rcu_read_lock held. */ @@ -2774,12 +2773,12 @@ static int bond_ab_arp_inspect(struct bonding *bond) int commit = 0; bond_for_each_slave_rcu(bond, slave, iter) { - slave->new_link = BOND_LINK_NOCHANGE; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); last_rx = slave_last_rx(bond, slave); if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, last_rx, 1)) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); commit++; } continue; @@ -2807,7 +2806,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (!bond_is_active_slave(slave) && !rcu_access_pointer(bond->current_arp_slave) && !bond_time_in_interval(bond, last_rx, 3)) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } @@ -2820,7 +2819,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (bond_is_active_slave(slave) && (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, last_rx, 2))) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } } @@ -2840,7 +2839,7 @@ static void bond_ab_arp_commit(struct bonding *bond) struct slave *slave; bond_for_each_slave(bond, slave, iter) { - switch (slave->new_link) { + switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: continue; @@ -2890,8 +2889,9 @@ static void bond_ab_arp_commit(struct bonding *bond) continue; default: - slave_err(bond->dev, slave->dev, "impossible: new_link %d on slave\n", - slave->new_link); + slave_err(bond->dev, slave->dev, + "impossible: link_new_state %d on slave\n", + slave->link_new_state); continue; } diff --git a/include/net/bonding.h b/include/net/bonding.h index 1afc125014da..3d56b026bb9e 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -159,7 +159,6 @@ struct slave { unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; s8 link; /* one of BOND_LINK_XXXX */ s8 link_new_state; /* one of BOND_LINK_XXXX */ - s8 new_link; u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ @@ -549,7 +548,7 @@ static inline void bond_propose_link_state(struct slave *slave, int state) static inline void bond_commit_link_state(struct slave *slave, bool notify) { - if (slave->link == slave->link_new_state) + if (slave->link_new_state == BOND_LINK_NOCHANGE) return; slave->link = slave->link_new_state; -- cgit v1.2.3 From 2ef17216d732f40dcd96423384064d542e3ff658 Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Sat, 2 Nov 2019 17:14:42 +0530 Subject: net: hns3: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files related to Hisilicon network devices. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 75ccc1e7076b..a0998937727d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HNAE3_H diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 2110fa3b4479..5d468ed404a6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HNS3_ENET_H diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 4821fe08b5e4..1426eb5ddf3d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HCLGE_CMD_H diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h index 278f21e02736..b04702e65689 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HCLGE_DCB_H__ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index c3d56b872ed7..59b824347ba4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HCLGE_MAIN_H diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h index ef095d9c566f..dd9a1218a7b0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HCLGE_MDIO_H diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 818610988d34..260f22d19d81 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HCLGE_TM_H -- cgit v1.2.3 From 59eb87cb52c9f7164804bc8639c4d03ba9b0c169 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Sat, 2 Nov 2019 14:17:47 +0000 Subject: net: sched: prevent duplicate flower rules from tcf_proto destroy race When a new filter is added to cls_api, the function tcf_chain_tp_insert_unique() looks up the protocol/priority/chain to determine if the tcf_proto is duplicated in the chain's hashtable. It then creates a new entry or continues with an existing one. In cls_flower, this allows the function fl_ht_insert_unque to determine if a filter is a duplicate and reject appropriately, meaning that the duplicate will not be passed to drivers via the offload hooks. However, when a tcf_proto is destroyed it is removed from its chain before a hardware remove hook is hit. This can lead to a race whereby the driver has not received the remove message but duplicate flows can be accepted. This, in turn, can lead to the offload driver receiving incorrect duplicate flows and out of order add/delete messages. Prevent duplicates by utilising an approach suggested by Vlad Buslov. A hash table per block stores each unique chain/protocol/prio being destroyed. This entry is only removed when the full destroy (and hardware offload) has completed. If a new flow is being added with the same identiers as a tc_proto being detroyed, then the add request is replayed until the destroy is complete. Fixes: 8b64678e0af8 ("net: sched: refactor tp insert/delete for concurrent execution") Signed-off-by: John Hurley Signed-off-by: Vlad Buslov Reviewed-by: Simon Horman Reported-by: Louis Peens Signed-off-by: David S. Miller --- include/net/sch_generic.h | 4 +++ net/sched/cls_api.c | 83 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 637548d54b3e..d80acda231ae 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -362,6 +363,7 @@ struct tcf_proto { bool deleting; refcount_t refcnt; struct rcu_head rcu; + struct hlist_node destroy_ht_node; }; struct qdisc_skb_cb { @@ -414,6 +416,8 @@ struct tcf_block { struct list_head filter_chain_list; } chain0; struct rcu_head rcu; + DECLARE_HASHTABLE(proto_destroy_ht, 7); + struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */ }; #ifdef CONFIG_PROVE_LOCKING diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8717c0b26c90..20d60b8fcb70 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,62 @@ static LIST_HEAD(tcf_proto_base); /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(cls_mod_lock); +static u32 destroy_obj_hashfn(const struct tcf_proto *tp) +{ + return jhash_3words(tp->chain->index, tp->prio, + (__force __u32)tp->protocol, 0); +} + +static void tcf_proto_signal_destroying(struct tcf_chain *chain, + struct tcf_proto *tp) +{ + struct tcf_block *block = chain->block; + + mutex_lock(&block->proto_destroy_lock); + hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node, + destroy_obj_hashfn(tp)); + mutex_unlock(&block->proto_destroy_lock); +} + +static bool tcf_proto_cmp(const struct tcf_proto *tp1, + const struct tcf_proto *tp2) +{ + return tp1->chain->index == tp2->chain->index && + tp1->prio == tp2->prio && + tp1->protocol == tp2->protocol; +} + +static bool tcf_proto_exists_destroying(struct tcf_chain *chain, + struct tcf_proto *tp) +{ + u32 hash = destroy_obj_hashfn(tp); + struct tcf_proto *iter; + bool found = false; + + rcu_read_lock(); + hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter, + destroy_ht_node, hash) { + if (tcf_proto_cmp(tp, iter)) { + found = true; + break; + } + } + rcu_read_unlock(); + + return found; +} + +static void +tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp) +{ + struct tcf_block *block = chain->block; + + mutex_lock(&block->proto_destroy_lock); + if (hash_hashed(&tp->destroy_ht_node)) + hash_del_rcu(&tp->destroy_ht_node); + mutex_unlock(&block->proto_destroy_lock); +} + /* Find classifier type by string name */ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) @@ -234,9 +291,11 @@ static void tcf_proto_get(struct tcf_proto *tp) static void tcf_chain_put(struct tcf_chain *chain); static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, - struct netlink_ext_ack *extack) + bool sig_destroy, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, rtnl_held, extack); + if (sig_destroy) + tcf_proto_signal_destroyed(tp->chain, tp); tcf_chain_put(tp->chain); module_put(tp->ops->owner); kfree_rcu(tp, rcu); @@ -246,7 +305,7 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { if (refcount_dec_and_test(&tp->refcnt)) - tcf_proto_destroy(tp, rtnl_held, extack); + tcf_proto_destroy(tp, rtnl_held, true, extack); } static int walker_check_empty(struct tcf_proto *tp, void *fh, @@ -370,6 +429,7 @@ static bool tcf_chain_detach(struct tcf_chain *chain) static void tcf_block_destroy(struct tcf_block *block) { mutex_destroy(&block->lock); + mutex_destroy(&block->proto_destroy_lock); kfree_rcu(block, rcu); } @@ -545,6 +605,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_dereference(chain->filter_chain, chain); + while (tp) { + tp_next = rcu_dereference_protected(tp->next, 1); + tcf_proto_signal_destroying(chain, tp); + tp = tp_next; + } + tp = tcf_chain_dereference(chain->filter_chain, chain); RCU_INIT_POINTER(chain->filter_chain, NULL); tcf_chain0_head_change(chain, NULL); chain->flushing = true; @@ -844,6 +910,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); + mutex_init(&block->proto_destroy_lock); init_rwsem(&block->cb_lock); flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); @@ -1621,6 +1688,12 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, mutex_lock(&chain->filter_chain_lock); + if (tcf_proto_exists_destroying(chain, tp_new)) { + mutex_unlock(&chain->filter_chain_lock); + tcf_proto_destroy(tp_new, rtnl_held, false, NULL); + return ERR_PTR(-EAGAIN); + } + tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); if (!tp) @@ -1628,10 +1701,10 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, mutex_unlock(&chain->filter_chain_lock); if (tp) { - tcf_proto_destroy(tp_new, rtnl_held, NULL); + tcf_proto_destroy(tp_new, rtnl_held, false, NULL); tp_new = tp; } else if (err) { - tcf_proto_destroy(tp_new, rtnl_held, NULL); + tcf_proto_destroy(tp_new, rtnl_held, false, NULL); tp_new = ERR_PTR(err); } @@ -1669,6 +1742,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, return; } + tcf_proto_signal_destroying(chain, tp); next = tcf_chain_dereference(chain_info.next, chain); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); @@ -2188,6 +2262,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, err = -EINVAL; goto errout_locked; } else if (t->tcm_handle == 0) { + tcf_proto_signal_destroying(chain, tp); tcf_chain_tp_remove(chain, &chain_info, tp); mutex_unlock(&chain->filter_chain_lock); -- cgit v1.2.3 From e684000b8a2b1e14b9f8ebd72dfd998d44a864ca Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 2 Nov 2019 20:17:39 -0700 Subject: net: dsa: bcm_sf2: Fix driver removal With the DSA core doing the call to dsa_port_disable() we do not need to do that within the driver itself. This could cause an use after free since past dsa_unregister_switch() we should not be accessing any dsa_switch internal structures. Fixes: 0394a63acfe2 ("net: dsa: enable and disable all ports") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index d44651ad520c..69fc13046ac7 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1215,10 +1215,10 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); priv->wol_ports_mask = 0; + /* Disable interrupts */ + bcm_sf2_intr_disable(priv); dsa_unregister_switch(priv->dev->ds); bcm_sf2_cfp_exit(priv->dev->ds); - /* Disable all ports and interrupts */ - bcm_sf2_sw_suspend(priv->dev->ds); bcm_sf2_mdio_unregister(priv); return 0; -- cgit v1.2.3 From 57d0f00dfeb3775eae88af1c4aeda6bd35943f20 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Sun, 3 Nov 2019 11:11:35 +0200 Subject: mlx4_core: fix wrong comment about the reason of subtract one from the max_cqes The reason for the pre-allocation of one CQE is to enable resizing of the CQ. Fix comment accordingly. Signed-off-by: Dotan Barak Signed-off-by: Eli Cohen Signed-off-by: Vladimir Sokolovsky Signed-off-by: Yuval Shaia Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index fce9b3a24347..69bb6bb06e76 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -514,8 +514,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; /* * Subtract 1 from the limit because we need to allocate a - * spare CQE so the HCA HW can tell the difference between an - * empty CQ and a full CQ. + * spare CQE to enable resizing the CQ. */ dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; -- cgit v1.2.3 From 683916f6a84023407761d843048f1aea486b2612 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Nov 2019 15:36:57 -0800 Subject: net/tls: fix sk_msg trim on fallback to copy mode sk_msg_trim() tries to only update curr pointer if it falls into the trimmed region. The logic, however, does not take into the account pointer wrapping that sk_msg_iter_var_prev() does nor (as John points out) the fact that msg->sg is a ring buffer. This means that when the message was trimmed completely, the new curr pointer would have the value of MAX_MSG_FRAGS - 1, which is neither smaller than any other value, nor would it actually be correct. Special case the trimming to 0 length a little bit and rework the comparison between curr and end to take into account wrapping. This bug caused the TLS code to not copy all of the message, if zero copy filled in fewer sg entries than memcopy would need. Big thanks to Alexander Potapenko for the non-KMSAN reproducer. v2: - take into account that msg->sg is a ring buffer (John). Link: https://lore.kernel.org/netdev/20191030160542.30295-1-jakub.kicinski@netronome.com/ (v1) Fixes: d829e9c4112b ("tls: convert to generic sk_msg interface") Reported-by: syzbot+f8495bff23a879a6d0bd@syzkaller.appspotmail.com Reported-by: syzbot+6f50c99e8f6194bf363f@syzkaller.appspotmail.com Co-developed-by: John Fastabend Signed-off-by: Jakub Kicinski Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/skmsg.h | 9 ++++++--- net/core/skmsg.c | 20 +++++++++++++++----- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e4b3fb4bb77c..ce7055259877 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -139,6 +139,11 @@ static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes) } } +static inline u32 sk_msg_iter_dist(u32 start, u32 end) +{ + return end >= start ? end - start : end + (MAX_MSG_FRAGS - start); +} + #define sk_msg_iter_var_prev(var) \ do { \ if (var == 0) \ @@ -198,9 +203,7 @@ static inline u32 sk_msg_elem_used(const struct sk_msg *msg) if (sk_msg_full(msg)) return MAX_MSG_FRAGS; - return msg->sg.end >= msg->sg.start ? - msg->sg.end - msg->sg.start : - msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); + return sk_msg_iter_dist(msg->sg.start, msg->sg.end); } static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index cf390e0aa73d..ad31e4e53d0a 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -270,18 +270,28 @@ void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len) msg->sg.data[i].length -= trim; sk_mem_uncharge(sk, trim); + /* Adjust copybreak if it falls into the trimmed part of last buf */ + if (msg->sg.curr == i && msg->sg.copybreak > msg->sg.data[i].length) + msg->sg.copybreak = msg->sg.data[i].length; out: - /* If we trim data before curr pointer update copybreak and current - * so that any future copy operations start at new copy location. + sk_msg_iter_var_next(i); + msg->sg.end = i; + + /* If we trim data a full sg elem before curr pointer update + * copybreak and current so that any future copy operations + * start at new copy location. * However trimed data that has not yet been used in a copy op * does not require an update. */ - if (msg->sg.curr >= i) { + if (!msg->sg.size) { + msg->sg.curr = msg->sg.start; + msg->sg.copybreak = 0; + } else if (sk_msg_iter_dist(msg->sg.start, msg->sg.curr) >= + sk_msg_iter_dist(msg->sg.start, msg->sg.end)) { + sk_msg_iter_var_prev(i); msg->sg.curr = i; msg->sg.copybreak = msg->sg.data[i].length; } - sk_msg_iter_var_next(i); - msg->sg.end = i; } EXPORT_SYMBOL_GPL(sk_msg_trim); -- cgit v1.2.3 From e7a86c687e64ab24f88330ad24ecc9442ce40c5a Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Mon, 4 Nov 2019 17:54:22 -0700 Subject: net: qualcomm: rmnet: Fix potential UAF when unregistering During the exit/unregistration process of the RmNet driver, the function rmnet_unregister_real_device() is called to handle freeing the driver's internal state and removing the RX handler on the underlying physical device. However, the order of operations this function performs is wrong and can lead to a use after free of the rmnet_port structure. Before calling netdev_rx_handler_unregister(), this port structure is freed with kfree(). If packets are received on any RmNet devices before synchronize_net() completes, they will attempt to use this already-freed port structure when processing the packet. As such, before cleaning up any other internal state, the RX handler must be unregistered in order to guarantee that no further packets will arrive on the device. Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 9c54b715228e..06de59521fc4 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -57,10 +57,10 @@ static int rmnet_unregister_real_device(struct net_device *real_dev, if (port->nr_rmnet_devs) return -EINVAL; - kfree(port); - netdev_rx_handler_unregister(real_dev); + kfree(port); + /* release reference on real_dev */ dev_put(real_dev); -- cgit v1.2.3 From f75359f3ac855940c5718af10ba089b8977bf339 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Nov 2019 21:38:43 -0800 Subject: net: prevent load/store tearing on sk->sk_stamp Add a couple of READ_ONCE() and WRITE_ONCE() to prevent load-tearing and store-tearing in sock_read_timestamp() and sock_write_timestamp() This might prevent another KCSAN report. Fixes: 3a0ed3e96197 ("sock: Make sock->sk_stamp thread-safe") Signed-off-by: Eric Dumazet Cc: Deepa Dinamani Acked-by: Deepa Dinamani Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 8f9adcfac41b..718e62fbe869 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2342,7 +2342,7 @@ static inline ktime_t sock_read_timestamp(struct sock *sk) return kt; #else - return sk->sk_stamp; + return READ_ONCE(sk->sk_stamp); #endif } @@ -2353,7 +2353,7 @@ static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) sk->sk_stamp = kt; write_sequnlock(&sk->sk_stamp_seq); #else - sk->sk_stamp = kt; + WRITE_ONCE(sk->sk_stamp, kt); #endif } -- cgit v1.2.3 From 517ce4e93368938b204451285e53014549804868 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 5 Nov 2019 16:34:07 +0800 Subject: NFC: fdp: fix incorrect free object The address of fw_vsc_cfg is on stack. Releasing it with devm_kfree() is incorrect, which may result in a system crash or other security impacts. The expected object to free is *fw_vsc_cfg. Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/nfc/fdp/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index 1cd113c8d7cb..ad0abb1f0bae 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -259,7 +259,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, *fw_vsc_cfg, len); if (r) { - devm_kfree(dev, fw_vsc_cfg); + devm_kfree(dev, *fw_vsc_cfg); goto vsc_read_err; } } else { -- cgit v1.2.3 From 2836654a2735d3bc0479edd3ca7457d909b007ed Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 5 Nov 2019 14:13:48 +0200 Subject: Documentation: TLS: Add missing counter description Add TLS TX counter description for the handshake retransmitted packets that triggers the resync procedure then skip it, going into the regular transmit flow. Fixes: 46a3ea98074e ("net/mlx5e: kTLS, Enhance TX resync flow") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/tls-offload.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 0dd3f748239f..f914e81fd3a6 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -436,6 +436,10 @@ by the driver: encryption. * ``tx_tls_ooo`` - number of TX packets which were part of a TLS stream but did not arrive in the expected order. + * ``tx_tls_skip_no_sync_data`` - number of TX packets which were part of + a TLS stream and arrived out-of-order, but skipped the HW offload routine + and went to the regular transmit flow as they were retransmissions of the + connection handshake. * ``tx_tls_drop_no_sync_data`` - number of TX packets which were part of a TLS stream dropped, because they arrived out of order and associated record could not be found. -- cgit v1.2.3 From 832c4365bd42986b9495f4ea4c86668a78eacf28 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Fri, 4 Oct 2019 16:44:21 +0200 Subject: ARM: dts: stm32: relax qspi pins slew-rate for stm32mp157 Relax qspi pins slew-rate to minimize peak currents. Fixes: 844030057339 ("ARM: dts: stm32: add flash nor support on stm32mp157c eval board") Signed-off-by: Patrice Chotard Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp157-pinctrl.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi index e4a0d51ec3a8..0a3a7d66737b 100644 --- a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi +++ b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi @@ -609,13 +609,13 @@ ; /* QSPI_BK1_IO3 */ bias-disable; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; pins2 { pinmux = ; /* QSPI_BK1_NCS */ bias-pull-up; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; }; @@ -637,13 +637,13 @@ ; /* QSPI_BK2_IO3 */ bias-disable; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; pins2 { pinmux = ; /* QSPI_BK2_NCS */ bias-pull-up; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; }; -- cgit v1.2.3 From 9df50c2e16de7fd739d11d37303afec9e573b46f Mon Sep 17 00:00:00 2001 From: Christophe Roullier Date: Mon, 4 Nov 2019 15:31:45 +0100 Subject: ARM: dts: stm32: Fix CAN RAM mapping on stm32mp157c Split the 10Kbytes CAN message RAM to be able to use simultaneously FDCAN1 and FDCAN2 instances. First 5Kbytes are allocated to FDCAN1 and last 5Kbytes are used for FDCAN2. To do so, set the offset to 0x1400 in mram-cfg for FDCAN2. Fixes: d44d6e021301 ("ARM: dts: stm32: change CAN RAM mapping on stm32mp157c") Signed-off-by: Christophe Roullier Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp157c.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index 9b11654a0a39..f98e0370c0bc 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -932,7 +932,7 @@ interrupt-names = "int0", "int1"; clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>; clock-names = "hclk", "cclk"; - bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; + bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; status = "disabled"; }; @@ -945,7 +945,7 @@ interrupt-names = "int0", "int1"; clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>; clock-names = "hclk", "cclk"; - bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; + bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; status = "disabled"; }; -- cgit v1.2.3 From afe3af89cd38ebab2e432a54590acb262c3e15ac Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 4 Nov 2019 11:55:28 +0100 Subject: ARM: dts: stm32: remove OV5640 pinctrl definition on stm32mp157c-ev1 "push-pull" configuration is now fully handled by the gpiolib and the STMFX pinctrl driver. There is no longer need to declare a pinctrl group to only configure "push-pull" setting for the line. It is done directly by the gpiolib. Fixes: a502b343ebd0 ("pinctrl: stmfx: update pinconf settings") Signed-off-by: Alexandre Torgue Signed-off-by: Amelie Delaunay Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp157c-ev1.dts | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp157c-ev1.dts b/arch/arm/boot/dts/stm32mp157c-ev1.dts index 89d29b50c3f4..9928d43233ff 100644 --- a/arch/arm/boot/dts/stm32mp157c-ev1.dts +++ b/arch/arm/boot/dts/stm32mp157c-ev1.dts @@ -183,14 +183,12 @@ ov5640: camera@3c { compatible = "ovti,ov5640"; - pinctrl-names = "default"; - pinctrl-0 = <&ov5640_pins>; reg = <0x3c>; clocks = <&clk_ext_camera>; clock-names = "xclk"; DOVDD-supply = <&v2v8>; - powerdown-gpios = <&stmfx_pinctrl 18 GPIO_ACTIVE_HIGH>; - reset-gpios = <&stmfx_pinctrl 19 GPIO_ACTIVE_LOW>; + powerdown-gpios = <&stmfx_pinctrl 18 (GPIO_ACTIVE_HIGH | GPIO_PUSH_PULL)>; + reset-gpios = <&stmfx_pinctrl 19 (GPIO_ACTIVE_LOW | GPIO_PUSH_PULL)>; rotation = <180>; status = "okay"; @@ -226,12 +224,6 @@ drive-push-pull; bias-pull-down; }; - - ov5640_pins: camera { - pins = "agpio2", "agpio3"; /* stmfx pins 18 & 19 */ - drive-push-pull; - output-low; - }; }; }; }; -- cgit v1.2.3 From f4d6e0f79bcde7810890563bac8e0f3479fe6d03 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 4 Nov 2019 11:55:29 +0100 Subject: ARM: dts: stm32: change joystick pinctrl definition on stm32mp157c-ev1 Pins used for joystick are all configured as input. "push-pull" is not a valid setting for an input pin. Fixes: a502b343ebd0 ("pinctrl: stmfx: update pinconf settings") Signed-off-by: Alexandre Torgue Signed-off-by: Amelie Delaunay Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp157c-ev1.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp157c-ev1.dts b/arch/arm/boot/dts/stm32mp157c-ev1.dts index 9928d43233ff..91fc0a315c49 100644 --- a/arch/arm/boot/dts/stm32mp157c-ev1.dts +++ b/arch/arm/boot/dts/stm32mp157c-ev1.dts @@ -221,7 +221,6 @@ joystick_pins: joystick { pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4"; - drive-push-pull; bias-pull-down; }; }; -- cgit v1.2.3 From 9508ef5a980f5d847cad9b932b6ada8f2a3466c1 Mon Sep 17 00:00:00 2001 From: Dragos Tarcatu Date: Wed, 6 Nov 2019 08:58:16 -0600 Subject: ASoC: SOF: topology: Fix bytes control size checks When using the example SOF amp widget topology, KASAN dumps this when the AMP bytes kcontrol gets loaded: [ 9.579548] BUG: KASAN: slab-out-of-bounds in sof_control_load+0x8cc/0xac0 [snd_sof] [ 9.588194] Write of size 40 at addr ffff8882314559dc by task systemd-udevd/2411 Fix that by rejecting the topology if the bytes data size > max_size Fixes: 311ce4fe7637d ("ASoC: SOF: Add support for loading topologies") Reviewed-by: Jaska Uimonen Reviewed-by: Ranjani Sridharan Signed-off-by: Dragos Tarcatu Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191106145816.9367-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/topology.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index 0aabb3190ddc..4452594c2e17 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -543,15 +543,16 @@ static int sof_control_load_bytes(struct snd_soc_component *scomp, struct soc_bytes_ext *sbe = (struct soc_bytes_ext *)kc->private_value; int max_size = sbe->max; - if (le32_to_cpu(control->priv.size) > max_size) { + /* init the get/put bytes data */ + scontrol->size = sizeof(struct sof_ipc_ctrl_data) + + le32_to_cpu(control->priv.size); + + if (scontrol->size > max_size) { dev_err(sdev->dev, "err: bytes data size %d exceeds max %d.\n", - control->priv.size, max_size); + scontrol->size, max_size); return -EINVAL; } - /* init the get/put bytes data */ - scontrol->size = sizeof(struct sof_ipc_ctrl_data) + - le32_to_cpu(control->priv.size); scontrol->control_data = kzalloc(max_size, GFP_KERNEL); cdata = scontrol->control_data; if (!scontrol->control_data) -- cgit v1.2.3 From 002d3c65ee81a604430da61e20de7a5b32a0afd5 Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Wed, 6 Nov 2019 09:22:03 +0530 Subject: MAINTAINERS: update Cavium ThunderX2 maintainers jnair is no longer at caviumnetworks.com (or at marvell.com). This also means that Cavium ThunderX2 will now be maintained by Robert. This is probably a good time to map various email addresses used for my patches to my personal email ID, update .mailmap to do this. Link: https://lore.kernel.org/r/20191106035203.5389-1-c.jayachandran@gmail.com Signed-off-by: Jayachandran C Acked-by: Robert Richter Signed-off-by: Olof Johansson --- .mailmap | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index f652f3725772..e4c8f09525f5 100644 --- a/.mailmap +++ b/.mailmap @@ -108,6 +108,10 @@ Jason Gunthorpe Jason Gunthorpe Javi Merino +Jayachandran C +Jayachandran C +Jayachandran C +Jayachandran C Jean Tourrilhes Jeff Garzik diff --git a/MAINTAINERS b/MAINTAINERS index 94edb673dca5..66c4e189d8c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3741,7 +3741,6 @@ F: drivers/crypto/cavium/cpt/ CAVIUM THUNDERX2 ARM64 SOC M: Robert Richter -M: Jayachandran C L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm64/boot/dts/cavium/thunder2-99xx* -- cgit v1.2.3 From 7961eee3978475fd9e8626137f88595b1ca05856 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Tue, 5 Nov 2019 21:16:21 -0800 Subject: mm: memcontrol: fix NULL-ptr deref in percpu stats flush __mem_cgroup_free() can be called on the failure path in mem_cgroup_alloc(). However memcg_flush_percpu_vmstats() and memcg_flush_percpu_vmevents() which are called from __mem_cgroup_free() access the fields of memcg which can potentially be null if called from failure path from mem_cgroup_alloc(). Indeed syzbot has reported the following crash: kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 30393 Comm: syz-executor.1 Not tainted 5.4.0-rc2+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:memcg_flush_percpu_vmstats+0x4ae/0x930 mm/memcontrol.c:3436 Code: 05 41 89 c0 41 0f b6 04 24 41 38 c7 7c 08 84 c0 0f 85 5d 03 00 00 44 3b 05 33 d5 12 08 0f 83 e2 00 00 00 4c 89 f0 48 c1 e8 03 <42> 80 3c 28 00 0f 85 91 03 00 00 48 8b 85 10 fe ff ff 48 8b b0 90 RSP: 0018:ffff888095c27980 EFLAGS: 00010206 RAX: 0000000000000012 RBX: ffff888095c27b28 RCX: ffffc90008192000 RDX: 0000000000040000 RSI: ffffffff8340fae7 RDI: 0000000000000007 RBP: ffff888095c27be0 R08: 0000000000000000 R09: ffffed1013f0da33 R10: ffffed1013f0da32 R11: ffff88809f86d197 R12: fffffbfff138b760 R13: dffffc0000000000 R14: 0000000000000090 R15: 0000000000000007 FS: 00007f5027170700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000710158 CR3: 00000000a7b18000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __mem_cgroup_free+0x1a/0x190 mm/memcontrol.c:5021 mem_cgroup_free mm/memcontrol.c:5033 [inline] mem_cgroup_css_alloc+0x3a1/0x1ae0 mm/memcontrol.c:5160 css_create kernel/cgroup/cgroup.c:5156 [inline] cgroup_apply_control_enable+0x44d/0xc40 kernel/cgroup/cgroup.c:3119 cgroup_mkdir+0x899/0x11b0 kernel/cgroup/cgroup.c:5401 kernfs_iop_mkdir+0x14d/0x1d0 fs/kernfs/dir.c:1124 vfs_mkdir+0x42e/0x670 fs/namei.c:3807 do_mkdirat+0x234/0x2a0 fs/namei.c:3830 __do_sys_mkdir fs/namei.c:3846 [inline] __se_sys_mkdir fs/namei.c:3844 [inline] __x64_sys_mkdir+0x5c/0x80 fs/namei.c:3844 do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixing this by moving the flush to mem_cgroup_free as there is no need to flush anything if we see failure in mem_cgroup_alloc(). Link: http://lkml.kernel.org/r/20191018165231.249872-1-shakeelb@google.com Fixes: bb65f89b7d3d ("mm: memcontrol: flush percpu vmevents before releasing memcg") Fixes: c350a99ea2b1 ("mm: memcontrol: flush percpu vmstats before releasing memcg") Signed-off-by: Shakeel Butt Reported-by: syzbot+515d5bcfe179cdf049b2@syzkaller.appspotmail.com Reviewed-by: Roman Gushchin Cc: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 363106578876..0507b1cfd7e8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5014,12 +5014,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; - /* - * Flush percpu vmstats and vmevents to guarantee the value correctness - * on parent's and all ancestor levels. - */ - memcg_flush_percpu_vmstats(memcg, false); - memcg_flush_percpu_vmevents(memcg); for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->vmstats_percpu); @@ -5030,6 +5024,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) static void mem_cgroup_free(struct mem_cgroup *memcg) { memcg_wb_domain_exit(memcg); + /* + * Flush percpu vmstats and vmevents to guarantee the value correctness + * on parent's and all ancestor levels. + */ + memcg_flush_percpu_vmstats(memcg, false); + memcg_flush_percpu_vmevents(memcg); __mem_cgroup_free(memcg); } -- cgit v1.2.3 From 64801d19eba156170340c76f70ade743defcb8ce Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 5 Nov 2019 21:16:24 -0800 Subject: mm/gup_benchmark: fix MAP_HUGETLB case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MAP_HUGETLB ("-H" option) of gup_benchmark fails: $ sudo ./gup_benchmark -H mmap: Invalid argument This is because gup_benchmark.c is passing in a file descriptor to mmap(), but the fd came from opening up the /dev/zero file. This confuses the mmap syscall implementation, which thinks that, if the caller did not specify MAP_ANONYMOUS, then the file must be a huge page file. So it attempts to verify that the file really is a huge page file, as you can see here: ksys_mmap_pgoff() { if (!(flags & MAP_ANONYMOUS)) { retval = -EINVAL; if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file))) goto out_fput; /* THIS IS WHERE WE END UP */ else if (flags & MAP_HUGETLB) { ...proceed normally, /dev/zero is ok here... ...and of course is_file_hugepages() returns "false" for the /dev/zero file. The problem is that the user space program, gup_benchmark.c, really just wants anonymous memory here. The simplest way to get that is to pass MAP_ANONYMOUS whenever MAP_HUGETLB is specified, so that's what this patch does. Link: http://lkml.kernel.org/r/20191021212435.398153-2-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: Andrew Morton Reviewed-by: Jérôme Glisse Cc: Keith Busch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/gup_benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index cb3fc09645c4..485cf06ef013 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -71,7 +71,7 @@ int main(int argc, char **argv) flags |= MAP_SHARED; break; case 'H': - flags |= MAP_HUGETLB; + flags |= (MAP_HUGETLB | MAP_ANONYMOUS); break; default: return -1; -- cgit v1.2.3 From 3e8fc0075e24338b1117cdff6a79477427b8dbed Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 5 Nov 2019 21:16:27 -0800 Subject: mm, meminit: recalculate pcpu batch and high limits after init completes Deferred memory initialisation updates zone->managed_pages during the initialisation phase but before that finishes, the per-cpu page allocator (pcpu) calculates the number of pages allocated/freed in batches as well as the maximum number of pages allowed on a per-cpu list. As zone->managed_pages is not up to date yet, the pcpu initialisation calculates inappropriately low batch and high values. This increases zone lock contention quite severely in some cases with the degree of severity depending on how many CPUs share a local zone and the size of the zone. A private report indicated that kernel build times were excessive with extremely high system CPU usage. A perf profile indicated that a large chunk of time was lost on zone->lock contention. This patch recalculates the pcpu batch and high values after deferred initialisation completes for every populated zone in the system. It was tested on a 2-socket AMD EPYC 2 machine using a kernel compilation workload -- allmodconfig and all available CPUs. mmtests configuration: config-workload-kernbench-max Configuration was modified to build on a fresh XFS partition. kernbench 5.4.0-rc3 5.4.0-rc3 vanilla resetpcpu-v2 Amean user-256 13249.50 ( 0.00%) 16401.31 * -23.79%* Amean syst-256 14760.30 ( 0.00%) 4448.39 * 69.86%* Amean elsp-256 162.42 ( 0.00%) 119.13 * 26.65%* Stddev user-256 42.97 ( 0.00%) 19.15 ( 55.43%) Stddev syst-256 336.87 ( 0.00%) 6.71 ( 98.01%) Stddev elsp-256 2.46 ( 0.00%) 0.39 ( 84.03%) 5.4.0-rc3 5.4.0-rc3 vanilla resetpcpu-v2 Duration User 39766.24 49221.79 Duration System 44298.10 13361.67 Duration Elapsed 519.11 388.87 The patch reduces system CPU usage by 69.86% and total build time by 26.65%. The variance of system CPU usage is also much reduced. Before, this was the breakdown of batch and high values over all zones was: 256 batch: 1 256 batch: 63 512 batch: 7 256 high: 0 256 high: 378 512 high: 42 512 pcpu pagesets had a batch limit of 7 and a high limit of 42. After the patch: 256 batch: 1 768 batch: 63 256 high: 0 768 high: 378 [mgorman@techsingularity.net: fix merge/linkage snafu] Link: http://lkml.kernel.org/r/20191023084705.GD3016@techsingularity.netLink: http://lkml.kernel.org/r/20191021094808.28824-2-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Matt Fleming Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Qian Cai Cc: [4.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ecc3dbad606b..6c717ad5f5c5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1947,6 +1947,14 @@ void __init page_alloc_init_late(void) /* Block until all are initialised */ wait_for_completion(&pgdat_init_all_done_comp); + /* + * The number of managed pages has changed due to the initialisation + * so the pcpu batch and high limits needs to be updated or the limits + * will be artificially small. + */ + for_each_populated_zone(zone) + zone_pcp_update(zone); + /* * We initialized the rest of the deferred pages. Permanently disable * on-demand struct page initialization. @@ -8514,7 +8522,6 @@ void free_contig_range(unsigned long pfn, unsigned int nr_pages) WARN(count != 0, "%d pages are still in use!\n", count); } -#ifdef CONFIG_MEMORY_HOTPLUG /* * The zone indicated has a new number of managed_pages; batch sizes and percpu * page high values need to be recalulated. @@ -8528,7 +8535,6 @@ void __meminit zone_pcp_update(struct zone *zone) per_cpu_ptr(zone->pageset, cpu)); mutex_unlock(&pcp_batch_high_lock); } -#endif void zone_pcp_reset(struct zone *zone) { -- cgit v1.2.3 From 169226f7e0d275c1879551f37484ef6683579a5c Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 5 Nov 2019 21:16:30 -0800 Subject: mm: thp: handle page cache THP correctly in PageTransCompoundMap We have a usecase to use tmpfs as QEMU memory backend and we would like to take the advantage of THP as well. But, our test shows the EPT is not PMD mapped even though the underlying THP are PMD mapped on host. The number showed by /sys/kernel/debug/kvm/largepage is much less than the number of PMD mapped shmem pages as the below: 7f2778200000-7f2878200000 rw-s 00000000 00:14 262232 /dev/shm/qemu_back_mem.mem.Hz2hSf (deleted) Size: 4194304 kB [snip] AnonHugePages: 0 kB ShmemPmdMapped: 579584 kB [snip] Locked: 0 kB cat /sys/kernel/debug/kvm/largepages 12 And some benchmarks do worse than with anonymous THPs. By digging into the code we figured out that commit 127393fbe597 ("mm: thp: kvm: fix memory corruption in KVM with THP enabled") checks if there is a single PTE mapping on the page for anonymous THP when setting up EPT map. But the _mapcount < 0 check doesn't work for page cache THP since every subpage of page cache THP would get _mapcount inc'ed once it is PMD mapped, so PageTransCompoundMap() always returns false for page cache THP. This would prevent KVM from setting up PMD mapped EPT entry. So we need handle page cache THP correctly. However, when page cache THP's PMD gets split, kernel just remove the map instead of setting up PTE map like what anonymous THP does. Before KVM calls get_user_pages() the subpages may get PTE mapped even though it is still a THP since the page cache THP may be mapped by other processes at the mean time. Checking its _mapcount and whether the THP has PTE mapped or not. Although this may report some false negative cases (PTE mapped by other processes), it looks not trivial to make this accurate. With this fix /sys/kernel/debug/kvm/largepage would show reasonable pages are PMD mapped by EPT as the below: 7fbeaee00000-7fbfaee00000 rw-s 00000000 00:14 275464 /dev/shm/qemu_back_mem.mem.SKUvat (deleted) Size: 4194304 kB [snip] AnonHugePages: 0 kB ShmemPmdMapped: 557056 kB [snip] Locked: 0 kB cat /sys/kernel/debug/kvm/largepages 271 And the benchmarks are as same as anonymous THPs. [yang.shi@linux.alibaba.com: v4] Link: http://lkml.kernel.org/r/1571865575-42913-1-git-send-email-yang.shi@linux.alibaba.com Link: http://lkml.kernel.org/r/1571769577-89735-1-git-send-email-yang.shi@linux.alibaba.com Fixes: dd78fedde4b9 ("rmap: support file thp") Signed-off-by: Yang Shi Reported-by: Gang Deng Tested-by: Gang Deng Suggested-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 ----- include/linux/mm_types.h | 5 +++++ include/linux/page-flags.h | 20 ++++++++++++++++++-- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index cc292273e6ba..a2adf95b3f9c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -695,11 +695,6 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) extern void kvfree(const void *addr); -static inline atomic_t *compound_mapcount_ptr(struct page *page) -{ - return &page[1].compound_mapcount; -} - static inline int compound_mapcount(struct page *page) { VM_BUG_ON_PAGE(!PageCompound(page), page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2222fa795284..270aa8fd2800 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -221,6 +221,11 @@ struct page { #endif } _struct_page_alignment; +static inline atomic_t *compound_mapcount_ptr(struct page *page) +{ + return &page[1].compound_mapcount; +} + /* * Used for sizing the vmemmap region on some architectures */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f91cb8898ff0..1bf83c8fcaa7 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -622,12 +622,28 @@ static inline int PageTransCompound(struct page *page) * * Unlike PageTransCompound, this is safe to be called only while * split_huge_pmd() cannot run from under us, like if protected by the - * MMU notifier, otherwise it may result in page->_mapcount < 0 false + * MMU notifier, otherwise it may result in page->_mapcount check false * positives. + * + * We have to treat page cache THP differently since every subpage of it + * would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE + * mapped in the current process so comparing subpage's _mapcount to + * compound_mapcount to filter out PTE mapped case. */ static inline int PageTransCompoundMap(struct page *page) { - return PageTransCompound(page) && atomic_read(&page->_mapcount) < 0; + struct page *head; + + if (!PageTransCompound(page)) + return 0; + + if (PageAnon(page)) + return atomic_read(&page->_mapcount) < 0; + + head = compound_head(page); + /* File THP is PMD mapped and not PTE mapped */ + return atomic_read(&page->_mapcount) == + atomic_read(compound_mapcount_ptr(head)); } /* -- cgit v1.2.3 From e74540b285569d2b1e14fe7aee92297078f235ce Mon Sep 17 00:00:00 2001 From: Shuning Zhang Date: Tue, 5 Nov 2019 21:16:34 -0800 Subject: ocfs2: protect extent tree in ocfs2_prepare_inode_for_write() When the extent tree is modified, it should be protected by inode cluster lock and ip_alloc_sem. The extent tree is accessed and modified in the ocfs2_prepare_inode_for_write, but isn't protected by ip_alloc_sem. The following is a case. The function ocfs2_fiemap is accessing the extent tree, which is modified at the same time. kernel BUG at fs/ocfs2/extent_map.c:475! invalid opcode: 0000 [#1] SMP Modules linked in: tun ocfs2 ocfs2_nodemanager configfs ocfs2_stackglue [...] CPU: 16 PID: 14047 Comm: o2info Not tainted 4.1.12-124.23.1.el6uek.x86_64 #2 Hardware name: Oracle Corporation ORACLE SERVER X7-2L/ASM, MB MECH, X7-2L, BIOS 42040600 10/19/2018 task: ffff88019487e200 ti: ffff88003daa4000 task.ti: ffff88003daa4000 RIP: ocfs2_get_clusters_nocache.isra.11+0x390/0x550 [ocfs2] Call Trace: ocfs2_fiemap+0x1e3/0x430 [ocfs2] do_vfs_ioctl+0x155/0x510 SyS_ioctl+0x81/0xa0 system_call_fastpath+0x18/0xd8 Code: 18 48 c7 c6 60 7f 65 a0 31 c0 bb e2 ff ff ff 48 8b 4a 40 48 8b 7a 28 48 c7 c2 78 2d 66 a0 e8 38 4f 05 00 e9 28 fe ff ff 0f 1f 00 <0f> 0b 66 0f 1f 44 00 00 bb 86 ff ff ff e9 13 fe ff ff 66 0f 1f RIP ocfs2_get_clusters_nocache.isra.11+0x390/0x550 [ocfs2] ---[ end trace c8aa0c8180e869dc ]--- Kernel panic - not syncing: Fatal exception Kernel Offset: disabled This issue can be reproduced every week in a production environment. This issue is related to the usage mode. If others use ocfs2 in this mode, the kernel will panic frequently. [akpm@linux-foundation.org: coding style fixes] [Fix new warning due to unused function by removing said function - Linus ] Link: http://lkml.kernel.org/r/1568772175-2906-2-git-send-email-sunny.s.zhang@oracle.com Signed-off-by: Shuning Zhang Reviewed-by: Junxiao Bi Reviewed-by: Gang He Cc: Mark Fasheh Cc: Joel Becker Cc: Joseph Qi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 134 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 90 insertions(+), 44 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 53939bf9d7d2..9876db52913a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2098,53 +2098,89 @@ static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) return 0; } -static int ocfs2_prepare_inode_for_refcount(struct inode *inode, - struct file *file, - loff_t pos, size_t count, - int *meta_level) +static int ocfs2_inode_lock_for_extent_tree(struct inode *inode, + struct buffer_head **di_bh, + int meta_level, + int overwrite_io, + int write_sem, + int wait) { - int ret; - struct buffer_head *di_bh = NULL; - u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; - u32 clusters = - ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos; + int ret = 0; - ret = ocfs2_inode_lock(inode, &di_bh, 1); - if (ret) { - mlog_errno(ret); + if (wait) + ret = ocfs2_inode_lock(inode, NULL, meta_level); + else + ret = ocfs2_try_inode_lock(inode, + overwrite_io ? NULL : di_bh, meta_level); + if (ret < 0) goto out; + + if (wait) { + if (write_sem) + down_write(&OCFS2_I(inode)->ip_alloc_sem); + else + down_read(&OCFS2_I(inode)->ip_alloc_sem); + } else { + if (write_sem) + ret = down_write_trylock(&OCFS2_I(inode)->ip_alloc_sem); + else + ret = down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem); + + if (!ret) { + ret = -EAGAIN; + goto out_unlock; + } } - *meta_level = 1; + return ret; - ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); - if (ret) - mlog_errno(ret); +out_unlock: + brelse(*di_bh); + ocfs2_inode_unlock(inode, meta_level); out: - brelse(di_bh); return ret; } +static void ocfs2_inode_unlock_for_extent_tree(struct inode *inode, + struct buffer_head **di_bh, + int meta_level, + int write_sem) +{ + if (write_sem) + up_write(&OCFS2_I(inode)->ip_alloc_sem); + else + up_read(&OCFS2_I(inode)->ip_alloc_sem); + + brelse(*di_bh); + *di_bh = NULL; + + if (meta_level >= 0) + ocfs2_inode_unlock(inode, meta_level); +} + static int ocfs2_prepare_inode_for_write(struct file *file, loff_t pos, size_t count, int wait) { int ret = 0, meta_level = 0, overwrite_io = 0; + int write_sem = 0; struct dentry *dentry = file->f_path.dentry; struct inode *inode = d_inode(dentry); struct buffer_head *di_bh = NULL; + u32 cpos; + u32 clusters; /* * We start with a read level meta lock and only jump to an ex * if we need to make modifications here. */ for(;;) { - if (wait) - ret = ocfs2_inode_lock(inode, NULL, meta_level); - else - ret = ocfs2_try_inode_lock(inode, - overwrite_io ? NULL : &di_bh, meta_level); + ret = ocfs2_inode_lock_for_extent_tree(inode, + &di_bh, + meta_level, + overwrite_io, + write_sem, + wait); if (ret < 0) { - meta_level = -1; if (ret != -EAGAIN) mlog_errno(ret); goto out; @@ -2156,15 +2192,8 @@ static int ocfs2_prepare_inode_for_write(struct file *file, */ if (!wait && !overwrite_io) { overwrite_io = 1; - if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) { - ret = -EAGAIN; - goto out_unlock; - } ret = ocfs2_overwrite_io(inode, di_bh, pos, count); - brelse(di_bh); - di_bh = NULL; - up_read(&OCFS2_I(inode)->ip_alloc_sem); if (ret < 0) { if (ret != -EAGAIN) mlog_errno(ret); @@ -2183,7 +2212,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file, * set inode->i_size at the end of a write. */ if (should_remove_suid(dentry)) { if (meta_level == 0) { - ocfs2_inode_unlock(inode, meta_level); + ocfs2_inode_unlock_for_extent_tree(inode, + &di_bh, + meta_level, + write_sem); meta_level = 1; continue; } @@ -2197,18 +2229,32 @@ static int ocfs2_prepare_inode_for_write(struct file *file, ret = ocfs2_check_range_for_refcount(inode, pos, count); if (ret == 1) { - ocfs2_inode_unlock(inode, meta_level); - meta_level = -1; - - ret = ocfs2_prepare_inode_for_refcount(inode, - file, - pos, - count, - &meta_level); + ocfs2_inode_unlock_for_extent_tree(inode, + &di_bh, + meta_level, + write_sem); + ret = ocfs2_inode_lock_for_extent_tree(inode, + &di_bh, + meta_level, + overwrite_io, + 1, + wait); + write_sem = 1; + if (ret < 0) { + if (ret != -EAGAIN) + mlog_errno(ret); + goto out; + } + + cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; + clusters = + ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos; + ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); } if (ret < 0) { - mlog_errno(ret); + if (ret != -EAGAIN) + mlog_errno(ret); goto out_unlock; } @@ -2219,10 +2265,10 @@ out_unlock: trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, pos, count, wait); - brelse(di_bh); - - if (meta_level >= 0) - ocfs2_inode_unlock(inode, meta_level); + ocfs2_inode_unlock_for_extent_tree(inode, + &di_bh, + meta_level, + write_sem); out: return ret; -- cgit v1.2.3 From df2ec7641bd03624a7e54cc926e8c3f75c7a84d8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Nov 2019 21:16:37 -0800 Subject: mm/mmu_notifiers: use the right return code for WARN_ON The return code from the op callback is actually in _ret, while the WARN_ON was checking ret which causes it to misfire. Link: http://lkml.kernel.org/r/20191025175502.GA31127@ziepe.ca Fixes: 8402ce61bec2 ("mm/mmu_notifiers: check if mmu notifier callbacks are allowed to fail") Signed-off-by: Jason Gunthorpe Reviewed-by: Andrew Morton Cc: Daniel Vetter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmu_notifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 7fde88695f35..9a889e456168 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -180,7 +180,7 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) mn->ops->invalidate_range_start, _ret, !mmu_notifier_range_blockable(range) ? "non-" : ""); WARN_ON(mmu_notifier_range_blockable(range) || - ret != -EAGAIN); + _ret != -EAGAIN); ret = _ret; } } -- cgit v1.2.3 From abaed0112c1db08be15a784a2c5c8a8b3063cdd3 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 5 Nov 2019 21:16:40 -0800 Subject: mm, vmstat: hide /proc/pagetypeinfo from normal users /proc/pagetypeinfo is a debugging tool to examine internal page allocator state wrt to fragmentation. It is not very useful for any other use so normal users really do not need to read this file. Waiman Long has noticed that reading this file can have negative side effects because zone->lock is necessary for gathering data and that a) interferes with the page allocator and its users and b) can lead to hard lockups on large machines which have very long free_list. Reduce both issues by simply not exporting the file to regular users. Link: http://lkml.kernel.org/r/20191025072610.18526-2-mhocko@kernel.org Fixes: 467c996c1e19 ("Print out statistics in relation to fragmentation avoidance to /proc/pagetypeinfo") Signed-off-by: Michal Hocko Reported-by: Waiman Long Acked-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Waiman Long Acked-by: Rafael Aquini Acked-by: David Rientjes Reviewed-by: Andrew Morton Cc: David Hildenbrand Cc: Johannes Weiner Cc: Roman Gushchin Cc: Konstantin Khlebnikov Cc: Jann Horn Cc: Song Liu Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 6afc892a148a..4e885ecd44d1 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1972,7 +1972,7 @@ void __init init_mm_internals(void) #endif #ifdef CONFIG_PROC_FS proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); - proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op); + proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); proc_create_seq("vmstat", 0444, NULL, &vmstat_op); proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); #endif -- cgit v1.2.3 From 93b3a674485f6a4b8ffff85d1682d5e8b7c51560 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 5 Nov 2019 21:16:44 -0800 Subject: mm, vmstat: reduce zone->lock holding time by /proc/pagetypeinfo pagetypeinfo_showfree_print is called by zone->lock held in irq mode. This is not really nice because it blocks both any interrupts on that cpu and the page allocator. On large machines this might even trigger the hard lockup detector. Considering the pagetypeinfo is a debugging tool we do not really need exact numbers here. The primary reason to look at the outuput is to see how pageblocks are spread among different migratetypes and low number of pages is much more interesting therefore putting a bound on the number of pages on the free_list sounds like a reasonable tradeoff. The new output will simply tell [...] Node 6, zone Normal, type Movable >100000 >100000 >100000 >100000 41019 31560 23996 10054 3229 983 648 instead of Node 6, zone Normal, type Movable 399568 294127 221558 102119 41019 31560 23996 10054 3229 983 648 The limit has been chosen arbitrary and it is a subject of a future change should there be a need for that. While we are at it, also drop the zone lock after each free_list iteration which will help with the IRQ and page allocator responsiveness even further as the IRQ lock held time is always bound to those 100k pages. [akpm@linux-foundation.org: tweak comment text, per David Hildenbrand] Link: http://lkml.kernel.org/r/20191025072610.18526-3-mhocko@kernel.org Signed-off-by: Michal Hocko Suggested-by: Andrew Morton Reviewed-by: Waiman Long Acked-by: Vlastimil Babka Acked-by: David Hildenbrand Acked-by: Rafael Aquini Acked-by: David Rientjes Reviewed-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: Jann Horn Cc: Johannes Weiner Cc: Konstantin Khlebnikov Cc: Mel Gorman Cc: Roman Gushchin Cc: Song Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 4e885ecd44d1..a8222041bd44 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1383,12 +1383,29 @@ static void pagetypeinfo_showfree_print(struct seq_file *m, unsigned long freecount = 0; struct free_area *area; struct list_head *curr; + bool overflow = false; area = &(zone->free_area[order]); - list_for_each(curr, &area->free_list[mtype]) - freecount++; - seq_printf(m, "%6lu ", freecount); + list_for_each(curr, &area->free_list[mtype]) { + /* + * Cap the free_list iteration because it might + * be really large and we are under a spinlock + * so a long time spent here could trigger a + * hard lockup detector. Anyway this is a + * debugging tool so knowing there is a handful + * of pages of this order should be more than + * sufficient. + */ + if (++freecount >= 100000) { + overflow = true; + break; + } + } + seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount); + spin_unlock_irq(&zone->lock); + cond_resched(); + spin_lock_irq(&zone->lock); } seq_putc(m, '\n'); } -- cgit v1.2.3 From ec649c9d454ea372dcf16cccf48250994f1d7788 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 5 Nov 2019 21:16:48 -0800 Subject: mm/khugepaged: fix might_sleep() warn with CONFIG_HIGHPTE=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I got some khugepaged spew on a 32bit x86: BUG: sleeping function called from invalid context at include/linux/mmu_notifier.h:346 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 25, name: khugepaged INFO: lockdep is turned off. CPU: 1 PID: 25 Comm: khugepaged Not tainted 5.4.0-rc5-elk+ #206 Hardware name: System manufacturer P5Q-EM/P5Q-EM, BIOS 2203 07/08/2009 Call Trace: dump_stack+0x66/0x8e ___might_sleep.cold.96+0x95/0xa6 __might_sleep+0x2e/0x80 collapse_huge_page.isra.51+0x5ac/0x1360 khugepaged+0x9a9/0x20f0 kthread+0xf5/0x110 ret_from_fork+0x2e/0x38 Looks like it's due to CONFIG_HIGHPTE=y pte_offset_map()->kmap_atomic() vs. mmu_notifier_invalidate_range_start(). Let's do the naive approach and just reorder the two operations. Link: http://lkml.kernel.org/r/20191029201513.GG1208@intel.com Fixes: 810e24e009cf71 ("mm/mmu_notifiers: annotate with might_sleep()") Signed-off-by: Ville Syrjl Reviewed-by: Andrew Morton Acked-by: Kirill A. Shutemov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Jérôme Glisse Cc: Ralph Campbell Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Daniel Vetter Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 0a1b4b484ac5..f05d27b7183d 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1028,12 +1028,13 @@ static void collapse_huge_page(struct mm_struct *mm, anon_vma_lock_write(vma->anon_vma); - pte = pte_offset_map(pmd, address); - pte_ptl = pte_lockptr(mm, pmd); - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, address, address + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); + + pte = pte_offset_map(pmd, address); + pte_ptl = pte_lockptr(mm, pmd); + pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */ /* * After this gup_fast can't run anymore. This also removes -- cgit v1.2.3 From 1be334e5c0886197cc82923ff0ac5836111b7b57 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 5 Nov 2019 21:16:51 -0800 Subject: mm/page_alloc.c: ratelimit allocation failure warnings more aggressively While investigating a bug related to higher atomic allocation failures, we noticed the failure warnings positively drowning the console, and in our case trigger lockup warnings because of a serial console too slow to handle all that output. But even if we had a faster console, it's unclear what additional information the current level of repetition provides. Allocation failures happen for three reasons: The machine is OOM, the VM is failing to handle reasonable requests, or somebody is making unreasonable requests (and didn't acknowledge their opportunism with __GFP_NOWARN). Having the memory dump, a callstack, and the ratelimit stats on skipped failure warnings should provide enough information to let users/admins/developers know whether something is wrong and point them in the right direction for debugging, bpftracing etc. Limit allocation failure warnings to one spew every ten seconds. Link: http://lkml.kernel.org/r/20191028194906.26899-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6c717ad5f5c5..f391c0c4ed1d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3728,10 +3728,6 @@ try_this_zone: static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask) { unsigned int filter = SHOW_MEM_FILTER_NODES; - static DEFINE_RATELIMIT_STATE(show_mem_rs, HZ, 1); - - if (!__ratelimit(&show_mem_rs)) - return; /* * This documents exceptions given to allocations in certain @@ -3752,8 +3748,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) { struct va_format vaf; va_list args; - static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, - DEFAULT_RATELIMIT_BURST); + static DEFINE_RATELIMIT_STATE(nopage_rs, 10*HZ, 1); if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) return; -- cgit v1.2.3 From a31631302abce4c80913d4dc741c4a6b07969b0e Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Tue, 5 Nov 2019 21:16:54 -0800 Subject: zswap: add Vitaly to the maintainers list Per conversation with Dan, add myself to the zswap MAINTAINERS list. Link: http://lkml.kernel.org/r/20191028143154.31304-1-vitaly.wool@konsulko.com Signed-off-by: Vitaly Wool Acked-by: Dan Streetman Acked-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index cba1095547fd..d75f59693cdd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18034,6 +18034,7 @@ F: Documentation/vm/zsmalloc.rst ZSWAP COMPRESSED SWAP CACHING M: Seth Jennings M: Dan Streetman +M: Vitaly Wool L: linux-mm@kvack.org S: Maintained F: mm/zswap.c -- cgit v1.2.3 From 5cbf2fff3bba8d3c6a4d47c1754de1cf57e2b01f Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 5 Nov 2019 21:16:57 -0800 Subject: dump_stack: avoid the livelock of the dump_lock In the current code, we use the atomic_cmpxchg() to serialize the output of the dump_stack(), but this implementation suffers the thundering herd problem. We have observed such kind of livelock on a Marvell cn96xx board(24 cpus) when heavily using the dump_stack() in a kprobe handler. Actually we can let the competitors to wait for the releasing of the lock before jumping to atomic_cmpxchg(). This will definitely mitigate the thundering herd problem. Thanks Linus for the suggestion. [akpm@linux-foundation.org: fix comment] Link: http://lkml.kernel.org/r/20191030031637.6025-1-haokexin@gmail.com Fixes: b58d977432c8 ("dump_stack: serialize the output from dump_stack()") Signed-off-by: Kevin Hao Suggested-by: Linus Torvalds Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dump_stack.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 5cff72f18c4a..33ffbf308853 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -106,7 +106,12 @@ retry: was_locked = 1; } else { local_irq_restore(flags); - cpu_relax(); + /* + * Wait for the lock to release before jumping to + * atomic_cmpxchg() in order to mitigate the thundering herd + * problem. + */ + do { cpu_relax(); } while (atomic_read(&dump_lock) != -1); goto retry; } -- cgit v1.2.3 From 6981b76cf6f3252ed7b828fc9b2d333ff5f0bde8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 5 Nov 2019 21:17:00 -0800 Subject: MAINTAINERS: update information for "MEMORY MANAGEMENT" I was trying to find the mm tree in MAINTAINERS by searching "Morton". Unfortunately, I didn't find one. And I didn't even locate the MEMORY MANAGEMENT section quickly, because Andrew's name was not listed there. Thanks to Johannes who helped me find the mm tree. Let save other's time searching around by adding: M: Andrew Morton T: git git://github.com/hnaz/linux-mm.git [akpm@linux-foundation.org: add ozlabs.org quilt trees] Link: http://lkml.kernel.org/r/20191030202217.3498133-1-songliubraving@fb.com Signed-off-by: Song Liu Acked-by: Andrew Morton Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index d75f59693cdd..2a427d1e9f01 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10519,8 +10519,12 @@ F: mm/memblock.c F: Documentation/core-api/boot-time-mm.rst MEMORY MANAGEMENT +M: Andrew Morton L: linux-mm@kvack.org W: http://www.linux-mm.org +T: quilt https://ozlabs.org/~akpm/mmotm/ +T: quilt https://ozlabs.org/~akpm/mmots/ +T: git git://github.com/hnaz/linux-mm.git S: Maintained F: include/linux/mm.h F: include/linux/gfp.h -- cgit v1.2.3 From 221ec5c0a46c1a1740f34fb36fc661a5284d01b0 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 5 Nov 2019 21:17:03 -0800 Subject: mm: slab: make page_cgroup_ino() to recognize non-compound slab pages properly page_cgroup_ino() doesn't return a valid memcg pointer for non-compound slab pages, because it depends on PgHead AND PgSlab flags to be set to determine the memory cgroup from the kmem_cache. It's correct for compound pages, but not for generic small pages. Those don't have PgHead set, so it ends up returning zero. Fix this by replacing the condition to PageSlab() && !PageTail(). Before this patch: [root@localhost ~]# ./page-types -c /sys/fs/cgroup/user.slice/user-0.slice/user@0.service/ | grep slab 0x0000000000000080 38 0 _______S___________________________________ slab After this patch: [root@localhost ~]# ./page-types -c /sys/fs/cgroup/user.slice/user-0.slice/user@0.service/ | grep slab 0x0000000000000080 147 0 _______S___________________________________ slab Also, hwpoison_filter_task() uses output of page_cgroup_ino() in order to filter error injection events based on memcg. So if page_cgroup_ino() fails to return memcg pointer, we just fail to inject memory error. Considering that hwpoison filter is for testing, affected users are limited and the impact should be marginal. [n-horiguchi@ah.jp.nec.com: changelog additions] Link: http://lkml.kernel.org/r/20191031012151.2722280-1-guro@fb.com Fixes: 4d96ba353075 ("mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages") Signed-off-by: Roman Gushchin Reviewed-by: Shakeel Butt Acked-by: David Rientjes Cc: Vladimir Davydov Cc: Daniel Jordan Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- mm/slab.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0507b1cfd7e8..2655c07baada 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -484,7 +484,7 @@ ino_t page_cgroup_ino(struct page *page) unsigned long ino = 0; rcu_read_lock(); - if (PageHead(page) && PageSlab(page)) + if (PageSlab(page) && !PageTail(page)) memcg = memcg_from_slab_page(page); else memcg = READ_ONCE(page->mem_cgroup); diff --git a/mm/slab.h b/mm/slab.h index 68e455f2b698..b2b01694dc43 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -323,8 +323,8 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) * Expects a pointer to a slab page. Please note, that PageSlab() check * isn't sufficient, as it returns true also for tail compound slab pages, * which do not have slab_cache pointer set. - * So this function assumes that the page can pass PageHead() and PageSlab() - * checks. + * So this function assumes that the page can pass PageSlab() && !PageTail() + * check. * * The kmem_cache can be reparented asynchronously. The caller must ensure * the memcg lifetime, e.g. by taking rcu_read_lock() or cgroup_mutex. -- cgit v1.2.3 From 8731acc5068eb3f422a45c760d32198175c756f8 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 5 Nov 2019 21:17:06 -0800 Subject: scripts/gdb: fix debugging modules compiled with hot/cold partitioning gcc's -freorder-blocks-and-partition option makes it group frequently and infrequently used code in .text.hot and .text.unlikely sections respectively. At least when building modules on s390, this option is used by default. gdb assumes that all code is located in .text section, and that .text section is located at module load address. With such modules this is no longer the case: there is code in .text.hot and .text.unlikely, and either of them might precede .text. Fix by explicitly telling gdb the addresses of code sections. It might be tempting to do this for all sections, not only the ones in the white list. Unfortunately, gdb appears to have an issue, when telling it about e.g. loadable .note.gnu.build-id section causes it to think that non-loadable .note.Linux section is loaded at address 0, which in turn causes NULL pointers to be resolved to bogus symbols. So keep using the white list approach for the time being. Link: http://lkml.kernel.org/r/20191028152734.13065-1-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Reviewed-by: Jan Kiszka Cc: Kieran Bingham Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/gdb/linux/symbols.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 7b7c2fafbc68..be984aa29b75 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -99,7 +99,8 @@ lx-symbols command.""" attrs[n]['name'].string(): attrs[n]['address'] for n in range(int(sect_attrs['nsections']))} args = [] - for section_name in [".data", ".data..read_mostly", ".rodata", ".bss"]: + for section_name in [".data", ".data..read_mostly", ".rodata", ".bss", + ".text", ".text.hot", ".text.unlikely"]: address = section_name_to_address.get(section_name) if address: args.append(" -s {name} {addr}".format( -- cgit v1.2.3 From 656d571193262a11c2daa4012e53e4d645bbce56 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 5 Nov 2019 21:17:10 -0800 Subject: mm/memory_hotplug: fix updating the node span We recently started updating the node span based on the zone span to avoid touching uninitialized memmaps. Currently, we will always detect the node span to start at 0, meaning a node can easily span too many pages. pgdat_is_empty() will still work correctly if all zones span no pages. We should skip over all zones without spanned pages and properly handle the first detected zone that spans pages. Unfortunately, in contrast to the zone span (/proc/zoneinfo), the node span cannot easily be inspected and tested. The node span gives no real guarantees when an architecture supports memory hotplug, meaning it can easily contain holes or span pages of different nodes. The node span is not really used after init on architectures that support memory hotplug. E.g., we use it in mm/memory_hotplug.c:try_offline_node() and in mm/kmemleak.c:kmemleak_scan(). These users seem to be fine. Link: http://lkml.kernel.org/r/20191027222714.5313-1-david@redhat.com Fixes: 00d6c019b5bc ("mm/memory_hotplug: don't access uninitialized memmaps in shrink_pgdat_span()") Signed-off-by: David Hildenbrand Cc: Michal Hocko Cc: Oscar Salvador Cc: Stephen Rothwell Cc: Dan Williams Cc: Pavel Tatashin Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index df570e5c71cc..07e5c67f48a8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -447,6 +447,14 @@ static void update_pgdat_span(struct pglist_data *pgdat) zone->spanned_pages; /* No need to lock the zones, they can't change. */ + if (!zone->spanned_pages) + continue; + if (!node_end_pfn) { + node_start_pfn = zone->zone_start_pfn; + node_end_pfn = zone_end_pfn; + continue; + } + if (zone_end_pfn > node_end_pfn) node_end_pfn = zone_end_pfn; if (zone->zone_start_pfn < node_start_pfn) -- cgit v1.2.3 From 869712fd3de5a90b7ba23ae1272278cddc66b37b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 5 Nov 2019 21:17:13 -0800 Subject: mm: memcontrol: fix network errors from failing __GFP_ATOMIC charges While upgrading from 4.16 to 5.2, we noticed these allocation errors in the log of the new kernel: SLUB: Unable to allocate memory on node -1, gfp=0xa20(GFP_ATOMIC) cache: tw_sock_TCPv6(960:helper-logs), object size: 232, buffer size: 240, default order: 1, min order: 0 node 0: slabs: 5, objs: 170, free: 0 slab_out_of_memory+1 ___slab_alloc+969 __slab_alloc+14 kmem_cache_alloc+346 inet_twsk_alloc+60 tcp_time_wait+46 tcp_fin+206 tcp_data_queue+2034 tcp_rcv_state_process+784 tcp_v6_do_rcv+405 __release_sock+118 tcp_close+385 inet_release+46 __sock_release+55 sock_close+17 __fput+170 task_work_run+127 exit_to_usermode_loop+191 do_syscall_64+212 entry_SYSCALL_64_after_hwframe+68 accompanied by an increase in machines going completely radio silent under memory pressure. One thing that changed since 4.16 is e699e2c6a654 ("net, mm: account sock objects to kmemcg"), which made these slab caches subject to cgroup memory accounting and control. The problem with that is that cgroups, unlike the page allocator, do not maintain dedicated atomic reserves. As a cgroup's usage hovers at its limit, atomic allocations - such as done during network rx - can fail consistently for extended periods of time. The kernel is not able to operate under these conditions. We don't want to revert the culprit patch, because it indeed tracks a potentially substantial amount of memory used by a cgroup. We also don't want to implement dedicated atomic reserves for cgroups. There is no point in keeping a fixed margin of unused bytes in the cgroup's memory budget to accomodate a consumer that is impossible to predict - we'd be wasting memory and get into configuration headaches, not unlike what we have going with min_free_kbytes. We do this for physical mem because we have to, but cgroups are an accounting game. Instead, account these privileged allocations to the cgroup, but let them bypass the configured limit if they have to. This way, we get the benefits of accounting the consumed memory and have it exert pressure on the rest of the cgroup, but like with the page allocator, we shift the burden of reclaimining on behalf of atomic allocations onto the regular allocations that can block. Link: http://lkml.kernel.org/r/20191022233708.365764-1-hannes@cmpxchg.org Fixes: e699e2c6a654 ("net, mm: account sock objects to kmemcg") Signed-off-by: Johannes Weiner Reviewed-by: Shakeel Butt Cc: Suleiman Souhlal Cc: Michal Hocko Cc: [4.18+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2655c07baada..37592dd7ae32 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2534,6 +2534,15 @@ retry: goto retry; } + /* + * Memcg doesn't have a dedicated reserve for atomic + * allocations. But like the global atomic pool, we need to + * put the burden of reclaim on regular allocation requests + * and let these go through as privileged allocations. + */ + if (gfp_mask & __GFP_ATOMIC) + goto force; + /* * Unlike in global OOM situations, memcg is not in a physical * memory shortage. Allow dying and OOM-killed tasks to -- cgit v1.2.3 From e7af6307a8a54f0b873960b32b6a644f2d0fbd97 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Nov 2019 17:55:47 +0100 Subject: ALSA: timer: Fix incorrectly assigned timer instance The clean up commit 41672c0c24a6 ("ALSA: timer: Simplify error path in snd_timer_open()") unified the error handling code paths with the standard goto, but it introduced a subtle bug: the timer instance is stored in snd_timer_open() incorrectly even if it returns an error. This may eventually lead to UAF, as spotted by fuzzer. The culprit is the snd_timer_open() code checks the SNDRV_TIMER_IFLG_EXCLUSIVE flag with the common variable timeri. This variable is supposed to be the newly created instance, but we (ab-)used it for a temporary check before the actual creation of a timer instance. After that point, there is another check for the max number of instances, and it bails out if over the threshold. Before the refactoring above, it worked fine because the code returned directly from that point. After the refactoring, however, it jumps to the unified error path that stores the timeri variable in return -- even if it returns an error. Unfortunately this stored value is kept in the caller side (snd_timer_user_tselect()) in tu->timeri. This causes inconsistency later, as if the timer was successfully assigned. In this patch, we fix it by not re-using timeri variable but a temporary variable for testing the exclusive connection, so timeri remains NULL at that point. Fixes: 41672c0c24a6 ("ALSA: timer: Simplify error path in snd_timer_open()") Reported-and-tested-by: Tristan Madani Cc: Link: https://lore.kernel.org/r/20191106165547.23518-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/timer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 6b724d2ee2de..59ae21b0bb93 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -284,11 +284,11 @@ int snd_timer_open(struct snd_timer_instance **ti, goto unlock; } if (!list_empty(&timer->open_list_head)) { - timeri = list_entry(timer->open_list_head.next, + struct snd_timer_instance *t = + list_entry(timer->open_list_head.next, struct snd_timer_instance, open_list); - if (timeri->flags & SNDRV_TIMER_IFLG_EXCLUSIVE) { + if (t->flags & SNDRV_TIMER_IFLG_EXCLUSIVE) { err = -EBUSY; - timeri = NULL; goto unlock; } } -- cgit v1.2.3 From e2f238f7d5a1fa69ff1884d37acf9a2a3a01b308 Mon Sep 17 00:00:00 2001 From: Honggang Li Date: Mon, 4 Nov 2019 20:43:22 +0800 Subject: configfs: calculate the depth of parent item When create symbolic link, create_link should calculate the depth of the parent item. However, both the first and second parameters of configfs_get_target_path had been set to the target. Broken symbolic link created. $ targetcli ls / o- / ............................................................. [...] o- backstores .................................................. [...] | o- block ...................................... [Storage Objects: 0] | o- fileio ..................................... [Storage Objects: 2] | | o- vdev0 .......... [/dev/ramdisk1 (16.0MiB) write-thru activated] | | | o- alua ....................................... [ALUA Groups: 1] | | | o- default_tg_pt_gp ........... [ALUA state: Active/optimized] | | o- vdev1 .......... [/dev/ramdisk2 (16.0MiB) write-thru activated] | | o- alua ....................................... [ALUA Groups: 1] | | o- default_tg_pt_gp ........... [ALUA state: Active/optimized] | o- pscsi ...................................... [Storage Objects: 0] | o- ramdisk .................................... [Storage Objects: 0] o- iscsi ................................................ [Targets: 0] o- loopback ............................................. [Targets: 0] o- srpt ................................................. [Targets: 2] | o- ib.e89a8f91cb3200000000000000000000 ............... [no-gen-acls] | | o- acls ................................................ [ACLs: 2] | | | o- ib.e89a8f91cb3200000000000000000000 ........ [Mapped LUNs: 2] | | | | o- mapped_lun0 ............................. [BROKEN LUN LINK] | | | | o- mapped_lun1 ............................. [BROKEN LUN LINK] | | | o- ib.e89a8f91cb3300000000000000000000 ........ [Mapped LUNs: 2] | | | o- mapped_lun0 ............................. [BROKEN LUN LINK] | | | o- mapped_lun1 ............................. [BROKEN LUN LINK] | | o- luns ................................................ [LUNs: 2] | | o- lun0 ...... [fileio/vdev0 (/dev/ramdisk1) (default_tg_pt_gp)] | | o- lun1 ...... [fileio/vdev1 (/dev/ramdisk2) (default_tg_pt_gp)] | o- ib.e89a8f91cb3300000000000000000000 ............... [no-gen-acls] | o- acls ................................................ [ACLs: 0] | o- luns ................................................ [LUNs: 0] o- vhost ................................................ [Targets: 0] Fixes: e9c03af21cc7 ("configfs: calculate the symlink target only once") Signed-off-by: Honggang Li Signed-off-by: Christoph Hellwig --- fs/configfs/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index dc5dbf6a81d7..cb61467478ca 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -101,7 +101,7 @@ static int create_link(struct config_item *parent_item, } target_sd->s_links++; spin_unlock(&configfs_dirent_lock); - ret = configfs_get_target_path(item, item, body); + ret = configfs_get_target_path(parent_item, item, body); if (!ret) ret = configfs_create_link(target_sd, parent_item->ci_dentry, dentry, body); -- cgit v1.2.3 From 86de88cfeb7cf33c7bbd18360e041c7d4e651bba Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 4 Nov 2019 09:37:36 -0800 Subject: drm/atomic: fix self-refresh helpers crtc state dereference drm_self_refresh_helper_update_avg_times() was incorrectly accessing the new incoming state after drm_atomic_helper_commit_hw_done(). But this state might have already been superceeded by an !nonblock atomic update resulting in dereferencing an already free'd crtc_state. TODO I *think* this will more or less do the right thing.. althought I'm not 100% sure if, for example, we enter psr in a nonblock commit, and then leave psr in a !nonblock commit that overtakes the completion of the nonblock commit. Not sure if this sort of scenario can happen in practice. But not crashing is better than crashing, so I guess we should either take this patch or rever the self-refresh helpers until Sean can figure out a better solution. Fixes: d4da4e33341c ("drm: Measure Self Refresh Entry/Exit times to avoid thrashing") Cc: Sean Paul Signed-off-by: Rob Clark [seanpaul fixed up some checkpatch warns] Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20191104173737.142558-1-robdclark@gmail.com --- drivers/gpu/drm/drm_atomic_helper.c | 15 ++++++++++++++- drivers/gpu/drm/drm_self_refresh_helper.c | 18 +++++++++++------- include/drm/drm_self_refresh_helper.h | 3 ++- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 3ef2ac52ce94..2dd2cd87cdbb 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1581,8 +1581,11 @@ static void commit_tail(struct drm_atomic_state *old_state) { struct drm_device *dev = old_state->dev; const struct drm_mode_config_helper_funcs *funcs; + struct drm_crtc_state *new_crtc_state; + struct drm_crtc *crtc; ktime_t start; s64 commit_time_ms; + unsigned int i, new_self_refresh_mask = 0; funcs = dev->mode_config.helper_private; @@ -1602,6 +1605,15 @@ static void commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_wait_for_dependencies(old_state); + /* + * We cannot safely access new_crtc_state after + * drm_atomic_helper_commit_hw_done() so figure out which crtc's have + * self-refresh active beforehand: + */ + for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) + if (new_crtc_state->self_refresh_active) + new_self_refresh_mask |= BIT(i); + if (funcs && funcs->atomic_commit_tail) funcs->atomic_commit_tail(old_state); else @@ -1610,7 +1622,8 @@ static void commit_tail(struct drm_atomic_state *old_state) commit_time_ms = ktime_ms_delta(ktime_get(), start); if (commit_time_ms > 0) drm_self_refresh_helper_update_avg_times(old_state, - (unsigned long)commit_time_ms); + (unsigned long)commit_time_ms, + new_self_refresh_mask); drm_atomic_helper_commit_cleanup_done(old_state); diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c index 68f4765a5896..dd33fec5aabd 100644 --- a/drivers/gpu/drm/drm_self_refresh_helper.c +++ b/drivers/gpu/drm/drm_self_refresh_helper.c @@ -133,29 +133,33 @@ out_drop_locks: * drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages * @state: the state which has just been applied to hardware * @commit_time_ms: the amount of time in ms that this commit took to complete + * @new_self_refresh_mask: bitmask of crtc's that have self_refresh_active in + * new state * * Called after &drm_mode_config_funcs.atomic_commit_tail, this function will * update the average entry/exit self refresh times on self refresh transitions. * These averages will be used when calculating how long to delay before * entering self refresh mode after activity. */ -void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, - unsigned int commit_time_ms) +void +drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, + unsigned int commit_time_ms, + unsigned int new_self_refresh_mask) { struct drm_crtc *crtc; - struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct drm_crtc_state *old_crtc_state; int i; - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, - new_crtc_state, i) { + for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) { + bool new_self_refresh_active = new_self_refresh_mask & BIT(i); struct drm_self_refresh_data *sr_data = crtc->self_refresh_data; struct ewma_psr_time *time; if (old_crtc_state->self_refresh_active == - new_crtc_state->self_refresh_active) + new_self_refresh_active) continue; - if (new_crtc_state->self_refresh_active) + if (new_self_refresh_active) time = &sr_data->entry_avg_ms; else time = &sr_data->exit_avg_ms; diff --git a/include/drm/drm_self_refresh_helper.h b/include/drm/drm_self_refresh_helper.h index 5b79d253fb46..520235c20708 100644 --- a/include/drm/drm_self_refresh_helper.h +++ b/include/drm/drm_self_refresh_helper.h @@ -13,7 +13,8 @@ struct drm_crtc; void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state); void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, - unsigned int commit_time_ms); + unsigned int commit_time_ms, + unsigned int new_self_refresh_mask); int drm_self_refresh_helper_init(struct drm_crtc *crtc); void drm_self_refresh_helper_cleanup(struct drm_crtc *crtc); -- cgit v1.2.3 From 3a55402c93877d291b0a612d25edb03d1b4b93ac Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 5 Nov 2019 11:07:24 -0800 Subject: net: bcmgenet: use RGMII loopback for MAC reset As noted in commit 28c2d1a7a0bf ("net: bcmgenet: enable loopback during UniMAC sw_reset") the UniMAC must be clocked while sw_reset is asserted for its state machines to reset cleanly. The transmit and receive clocks used by the UniMAC are derived from the signals used on its PHY interface. The bcmgenet MAC can be configured to work with different PHY interfaces including MII, GMII, RGMII, and Reverse MII on internal and external interfaces. Unfortunately for the UniMAC, when configured for MII the Tx clock is always driven from the PHY which places it outside of the direct control of the MAC. The earlier commit enabled a local loopback mode within the UniMAC so that the receive clock would be derived from the transmit clock which addressed the observed issue with an external GPHY disabling it's Rx clock. However, when a Tx clock is not available this loopback is insufficient. This commit implements a workaround that leverages the fact that the MAC can reliably generate all of its necessary clocking by enterring the external GPHY RGMII interface mode with the UniMAC in local loopback during the sw_reset interval. Unfortunately, this has the undesirable side efect of the RGMII GTXCLK signal being driven during the same window. In most configurations this is a benign side effect as the signal is either not routed to a pin or is already expected to drive the pin. The one exception is when an external MII PHY is expected to drive the same pin with its TX_CLK output creating output driver contention. This commit exploits the IEEE 802.3 clause 22 standard defined isolate mode to force an external MII PHY to present a high impedance on its TX_CLK output during the window to prevent any contention at the pin. The MII interface is used internally with the 40nm internal EPHY which agressively disables its clocks for power savings leading to incomplete resets of the UniMAC and many instabilities observed over the years. The workaround of this commit is expected to put an end to those problems. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 -- drivers/net/ethernet/broadcom/genet/bcmmii.c | 33 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0f138280315a..a1776ed8d7a1 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1996,8 +1996,6 @@ static void reset_umac(struct bcmgenet_priv *priv) /* issue soft reset with (rg)mii loopback to ensure a stable rxclk */ bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD); - udelay(2); - bcmgenet_umac_writel(priv, 0, UMAC_CMD); } static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 17bb8d60a157..fcd181ae3a7d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -221,8 +221,38 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) const char *phy_name = NULL; u32 id_mode_dis = 0; u32 port_ctrl; + int bmcr = -1; + int ret; u32 reg; + /* MAC clocking workaround during reset of umac state machines */ + reg = bcmgenet_umac_readl(priv, UMAC_CMD); + if (reg & CMD_SW_RESET) { + /* An MII PHY must be isolated to prevent TXC contention */ + if (priv->phy_interface == PHY_INTERFACE_MODE_MII) { + ret = phy_read(phydev, MII_BMCR); + if (ret >= 0) { + bmcr = ret; + ret = phy_write(phydev, MII_BMCR, + bmcr | BMCR_ISOLATE); + } + if (ret) { + netdev_err(dev, "failed to isolate PHY\n"); + return ret; + } + } + /* Switch MAC clocking to RGMII generated clock */ + bcmgenet_sys_writel(priv, PORT_MODE_EXT_GPHY, SYS_PORT_CTRL); + /* Ensure 5 clks with Rx disabled + * followed by 5 clks with Reset asserted + */ + udelay(4); + reg &= ~(CMD_SW_RESET | CMD_LCL_LOOP_EN); + bcmgenet_umac_writel(priv, reg, UMAC_CMD); + /* Ensure 5 more clocks before Rx is enabled */ + udelay(2); + } + priv->ext_phy = !priv->internal_phy && (priv->phy_interface != PHY_INTERFACE_MODE_MOCA); @@ -254,6 +284,9 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) phy_set_max_speed(phydev, SPEED_100); bcmgenet_sys_writel(priv, PORT_MODE_EXT_EPHY, SYS_PORT_CTRL); + /* Restore the MII PHY after isolation */ + if (bmcr >= 0) + phy_write(phydev, MII_BMCR, bmcr); break; case PHY_INTERFACE_MODE_REVMII: -- cgit v1.2.3 From 6b6d017fccb4693767d2fcae9ef2fd05243748bb Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 5 Nov 2019 11:07:25 -0800 Subject: Revert "net: bcmgenet: soft reset 40nm EPHYs before MAC init" This reverts commit 1f515486275a08a17a2c806b844cca18f7de5b34. This commit improved the chances of the umac resetting cleanly by ensuring that the PHY was restored to its normal operation prior to resetting the umac. However, there were still cases when the PHY might not be driving a Tx clock to the umac during this window (e.g. when the PHY detects no link). The previous commit now ensures that the unimac receives clocks from the MAC during its reset window so this commit is no longer needed. This commit also has an unintended negative impact on the MDIO performance of the UniMAC MDIO interface because it is used before the MDIO interrupts are reenabled, so it should be removed. Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 28 +++---- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 112 ++++++++++++++----------- 3 files changed, 73 insertions(+), 69 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index a1776ed8d7a1..b5255dd08265 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2877,12 +2877,6 @@ static int bcmgenet_open(struct net_device *dev) if (priv->internal_phy) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); - ret = bcmgenet_mii_connect(dev); - if (ret) { - netdev_err(dev, "failed to connect to PHY\n"); - goto err_clk_disable; - } - /* take MAC out of reset */ bcmgenet_umac_reset(priv); @@ -2892,12 +2886,6 @@ static int bcmgenet_open(struct net_device *dev) reg = bcmgenet_umac_readl(priv, UMAC_CMD); priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); - ret = bcmgenet_mii_config(dev, true); - if (ret) { - netdev_err(dev, "unsupported PHY\n"); - goto err_disconnect_phy; - } - bcmgenet_set_hw_addr(priv, dev->dev_addr); if (priv->internal_phy) { @@ -2913,7 +2901,7 @@ static int bcmgenet_open(struct net_device *dev) ret = bcmgenet_init_dma(priv); if (ret) { netdev_err(dev, "failed to initialize DMA\n"); - goto err_disconnect_phy; + goto err_clk_disable; } /* Always enable ring 16 - descriptor ring */ @@ -2936,19 +2924,25 @@ static int bcmgenet_open(struct net_device *dev) goto err_irq0; } + ret = bcmgenet_mii_probe(dev); + if (ret) { + netdev_err(dev, "failed to connect to PHY\n"); + goto err_irq1; + } + bcmgenet_netif_start(dev); netif_tx_start_all_queues(dev); return 0; +err_irq1: + free_irq(priv->irq1, priv); err_irq0: free_irq(priv->irq0, priv); err_fini_dma: bcmgenet_dma_teardown(priv); bcmgenet_fini_dma(priv); -err_disconnect_phy: - phy_disconnect(dev->phydev); err_clk_disable: if (priv->internal_phy) bcmgenet_power_down(priv, GENET_POWER_PASSIVE); @@ -3629,8 +3623,6 @@ static int bcmgenet_resume(struct device *d) if (priv->internal_phy) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); - phy_init_hw(dev->phydev); - bcmgenet_umac_reset(priv); init_umac(priv); @@ -3639,6 +3631,8 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); + phy_init_hw(dev->phydev); + /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev, false); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 7fbf573d8d52..dbc69d8fa05f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -720,8 +720,8 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF); /* MDIO routines */ int bcmgenet_mii_init(struct net_device *dev); -int bcmgenet_mii_connect(struct net_device *dev); int bcmgenet_mii_config(struct net_device *dev, bool init); +int bcmgenet_mii_probe(struct net_device *dev); void bcmgenet_mii_exit(struct net_device *dev); void bcmgenet_phy_power_set(struct net_device *dev, bool enable); void bcmgenet_mii_setup(struct net_device *dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index fcd181ae3a7d..dbe18cdf6c1b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -173,46 +173,6 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_fixed_phy_link_update); } -int bcmgenet_mii_connect(struct net_device *dev) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - struct device_node *dn = priv->pdev->dev.of_node; - struct phy_device *phydev; - u32 phy_flags = 0; - int ret; - - /* Communicate the integrated PHY revision */ - if (priv->internal_phy) - phy_flags = priv->gphy_rev; - - /* Initialize link state variables that bcmgenet_mii_setup() uses */ - priv->old_link = -1; - priv->old_speed = -1; - priv->old_duplex = -1; - priv->old_pause = -1; - - if (dn) { - phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, - phy_flags, priv->phy_interface); - if (!phydev) { - pr_err("could not attach to PHY\n"); - return -ENODEV; - } - } else { - phydev = dev->phydev; - phydev->dev_flags = phy_flags; - - ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, - priv->phy_interface); - if (ret) { - pr_err("could not attach to PHY\n"); - return -ENODEV; - } - } - - return 0; -} - int bcmgenet_mii_config(struct net_device *dev, bool init) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -339,21 +299,71 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } - if (init) { - linkmode_copy(phydev->advertising, phydev->supported); + if (init) + dev_info(kdev, "configuring instance for %s\n", phy_name); - /* The internal PHY has its link interrupts routed to the - * Ethernet MAC ISRs. On GENETv5 there is a hardware issue - * that prevents the signaling of link UP interrupts when - * the link operates at 10Mbps, so fallback to polling for - * those versions of GENET. - */ - if (priv->internal_phy && !GENET_IS_V5(priv)) - phydev->irq = PHY_IGNORE_INTERRUPT; + return 0; +} - dev_info(kdev, "configuring instance for %s\n", phy_name); +int bcmgenet_mii_probe(struct net_device *dev) +{ + struct bcmgenet_priv *priv = netdev_priv(dev); + struct device_node *dn = priv->pdev->dev.of_node; + struct phy_device *phydev; + u32 phy_flags = 0; + int ret; + + /* Communicate the integrated PHY revision */ + if (priv->internal_phy) + phy_flags = priv->gphy_rev; + + /* Initialize link state variables that bcmgenet_mii_setup() uses */ + priv->old_link = -1; + priv->old_speed = -1; + priv->old_duplex = -1; + priv->old_pause = -1; + + if (dn) { + phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, + phy_flags, priv->phy_interface); + if (!phydev) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } + } else { + phydev = dev->phydev; + phydev->dev_flags = phy_flags; + + ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, + priv->phy_interface); + if (ret) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } + } + + /* Configure port multiplexer based on what the probed PHY device since + * reading the 'max-speed' property determines the maximum supported + * PHY speed which is needed for bcmgenet_mii_config() to configure + * things appropriately. + */ + ret = bcmgenet_mii_config(dev, true); + if (ret) { + phy_disconnect(dev->phydev); + return ret; } + linkmode_copy(phydev->advertising, phydev->supported); + + /* The internal PHY has its link interrupts routed to the + * Ethernet MAC ISRs. On GENETv5 there is a hardware issue + * that prevents the signaling of link UP interrupts when + * the link operates at 10Mbps, so fallback to polling for + * those versions of GENET. + */ + if (priv->internal_phy && !GENET_IS_V5(priv)) + dev->phydev->irq = PHY_IGNORE_INTERRUPT; + return 0; } -- cgit v1.2.3 From 0686bd9d5e6863f60e4bb1e78e6fe7bb217a0890 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 5 Nov 2019 11:07:26 -0800 Subject: net: bcmgenet: reapply manual settings to the PHY The phy_init_hw() function may reset the PHY to a configuration that does not match manual network settings stored in the phydev structure. If the phy state machine is polled rather than event driven this can create a timing hazard where the phy state machine might alter the settings stored in the phydev structure from the value read from the BMCR. This commit follows invocations of phy_init_hw() by the bcmgenet driver with invocations of the genphy_config_aneg() function to ensure that the BMCR is written to match the settings held in the phydev structure. This prevents the risk of manual settings being accidentally altered. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b5255dd08265..1de51811fcb4 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2612,8 +2612,10 @@ static void bcmgenet_irq_task(struct work_struct *work) spin_unlock_irq(&priv->lock); if (status & UMAC_IRQ_PHY_DET_R && - priv->dev->phydev->autoneg != AUTONEG_ENABLE) + priv->dev->phydev->autoneg != AUTONEG_ENABLE) { phy_init_hw(priv->dev->phydev); + genphy_config_aneg(priv->dev->phydev); + } /* Link UP/DOWN event */ if (status & UMAC_IRQ_LINK_EVENT) @@ -3634,6 +3636,7 @@ static int bcmgenet_resume(struct device *d) phy_init_hw(dev->phydev); /* Speed settings must be restored */ + genphy_config_aneg(dev->phydev); bcmgenet_mii_config(priv->dev, false); bcmgenet_set_hw_addr(priv, dev->dev_addr); -- cgit v1.2.3 From 6767df245f4736d0cf0c6fb7cf9cf94b27414245 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 6 Nov 2019 15:41:05 +0000 Subject: arm64: Do not mask out PTE_RDONLY in pte_same() Following commit 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()"), the PTE_RDONLY bit is no longer managed by set_pte_at() but built into the PAGE_* attribute definitions. Consequently, pte_same() must include this bit when checking two PTEs for equality. Remove the arm64-specific pte_same() function, practically reverting commit 747a70e60b72 ("arm64: Fix copy-on-write referencing in HugeTLB") Fixes: 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()") Cc: # 4.14.x- Cc: Will Deacon Cc: Steve Capper Reported-by: John Stultz Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 8330810f699e..565aa45ef134 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -283,23 +283,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pte); } -#define __HAVE_ARCH_PTE_SAME -static inline int pte_same(pte_t pte_a, pte_t pte_b) -{ - pteval_t lhs, rhs; - - lhs = pte_val(pte_a); - rhs = pte_val(pte_b); - - if (pte_present(pte_a)) - lhs &= ~PTE_RDONLY; - - if (pte_present(pte_b)) - rhs &= ~PTE_RDONLY; - - return (lhs == rhs); -} - /* * Huge pte definitions. */ -- cgit v1.2.3 From 576daab3cd029558b58c264694d84ff159572f9b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Oct 2019 13:29:52 -0400 Subject: drm/amdgpu/arcturus: properly set BANK_SELECT and FRAGMENT_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These were not aligned for optimal performance for GPUVM. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 0cf7ef44b4b5..9ed178fa241c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -219,6 +219,15 @@ static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); -- cgit v1.2.3 From f2efc6e60089c99c342a6b7da47f1037e06c4296 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Wed, 30 Oct 2019 14:20:46 +0530 Subject: drm/amdgpu: dont schedule jobs while in reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] doing kthread_park()/unpark() from drm_sched_entity_fini while GPU reset is in progress defeats all the purpose of drm_sched_stop->kthread_park. If drm_sched_entity_fini->kthread_unpark() happens AFTER drm_sched_stop->kthread_park nothing prevents from another (third) thread to keep submitting job to HW which will be picked up by the unparked scheduler thread and try to submit to HW but fail because the HW ring is deactivated. [How] grab the reset lock before calling drm_sched_entity_fini() Signed-off-by: Shirish S Suggested-by: Christian König Reviewed-by: Christian König Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6614d8a6f4c8..2cdaf3b2a721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -604,8 +604,11 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) continue; } - for (i = 0; i < num_entities; i++) + for (i = 0; i < num_entities; i++) { + mutex_lock(&ctx->adev->lock_reset); drm_sched_entity_fini(&ctx->entities[0][i].entity); + mutex_unlock(&ctx->adev->lock_reset); + } } } -- cgit v1.2.3 From f9686ceedc0a04a3309e02195184c1779f602699 Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Fri, 1 Nov 2019 21:10:17 -0400 Subject: drm/amd/display: Add ENGINE_ID_DIGD condition check for Navi14 [Why] Navi10 has 6 PHY, but Navi14 only has 5 PHY, that is because there is no ENGINE_ID_DIGD in Navi14. Without this patch, many HDMI related issues (e.g. HDMI S3 resume failure, HDMI pink screen on boot) will be observed. [How] If "eng_id" is larger than ENGINE_ID_DIGD, then add "eng_id" by 1. Signed-off-by: Zhan Liu Reviewed-by: Hersen Wu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index dfb208285a9c..6b2f2f1a1c9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1107,6 +1107,11 @@ struct stream_encoder *dcn20_stream_encoder_create( if (!enc1) return NULL; + if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) { + if (eng_id >= ENGINE_ID_DIGD) + eng_id++; + } + dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); -- cgit v1.2.3 From a85a64d39a26704800e602f8487a27cbc5257d6c Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Mon, 4 Nov 2019 15:46:56 -0400 Subject: Revert "drm/amd/display: setting the DIG_MODE to the correct value." This reverts commit 385857adb8154563840e5b0f200254126618f464. Reason for revert: Root cause of this issue is found. The workaround is not needed anymore. Signed-off-by: Zhan Liu Reviewed-by: Hersen Wu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 9c58670d5414..ca20b150afcc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2767,15 +2767,6 @@ void core_link_enable_stream( CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, COLOR_DEPTH_UNDEFINED); - /* This second call is needed to reconfigure the DIG - * as a workaround for the incorrect value being applied - * from transmitter control. - */ - if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) - stream->link->link_enc->funcs->setup( - stream->link->link_enc, - pipe_ctx->stream->signal); - #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT if (pipe_ctx->stream->timing.flags.DSC) { if (dc_is_dp_signal(pipe_ctx->stream->signal) || -- cgit v1.2.3 From 5e200fb97a765a4e70033139a5db1c816f67f288 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Wed, 6 Nov 2019 14:29:35 -0500 Subject: drm/amdgpu: add navi14 PCI ID Add the navi14 PCI device id. Reviewed-by: Hawking Zhang Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 2a00a36106b2..e1c15721611a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1016,6 +1016,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, -- cgit v1.2.3 From ff479731c3859609530416a18ddb3db5db019b66 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 6 Nov 2019 11:59:46 -0800 Subject: HID: wacom: generic: Treat serial number and related fields as unsigned The HID descriptors for most Wacom devices oddly declare the serial number and other related fields as signed integers. When these numbers are ingested by the HID subsystem, they are automatically sign-extended into 32-bit integers. We treat the fields as unsigned elsewhere in the kernel and userspace, however, so this sign-extension causes problems. In particular, the sign-extended tool ID sent to userspace as ABS_MISC does not properly match unsigned IDs used by xf86-input-wacom and libwacom. We introduce a function 'wacom_s32tou' that can undo the automatic sign extension performed by 'hid_snto32'. We call this function when processing the serial number and related fields to ensure that we are dealing with and reporting the unsigned form. We opt to use this method rather than adding a descriptor fixup in 'wacom_hid_usage_quirk' since it should be more robust in the face of future devices. Ref: https://github.com/linuxwacom/input-wacom/issues/134 Fixes: f85c9dc678 ("HID: wacom: generic: Support tool ID and additional tool types") CC: # v4.10+ Signed-off-by: Jason Gerecke Reviewed-by: Aaron Armstrong Skomra Signed-off-by: Jiri Kosina --- drivers/hid/wacom.h | 15 +++++++++++++++ drivers/hid/wacom_wac.c | 10 ++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h index 4a7f8d363220..203d27d198b8 100644 --- a/drivers/hid/wacom.h +++ b/drivers/hid/wacom.h @@ -202,6 +202,21 @@ static inline void wacom_schedule_work(struct wacom_wac *wacom_wac, } } +/* + * Convert a signed 32-bit integer to an unsigned n-bit integer. Undoes + * the normally-helpful work of 'hid_snto32' for fields that use signed + * ranges for questionable reasons. + */ +static inline __u32 wacom_s32tou(s32 value, __u8 n) +{ + switch (n) { + case 8: return ((__u8)value); + case 16: return ((__u16)value); + case 32: return ((__u32)value); + } + return value & (1 << (n - 1)) ? value & (~(~0U << n)) : value; +} + extern const struct hid_device_id wacom_ids[]; void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 2b0a5b8ca6e6..ccb74529bc78 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2303,7 +2303,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field case HID_DG_TOOLSERIALNUMBER: if (value) { wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL); - wacom_wac->serial[0] |= (__u32)value; + wacom_wac->serial[0] |= wacom_s32tou(value, field->report_size); } return; case HID_DG_TWIST: @@ -2319,15 +2319,17 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field return; case WACOM_HID_WD_SERIALHI: if (value) { + __u32 raw_value = wacom_s32tou(value, field->report_size); + wacom_wac->serial[0] = (wacom_wac->serial[0] & 0xFFFFFFFF); - wacom_wac->serial[0] |= ((__u64)value) << 32; + wacom_wac->serial[0] |= ((__u64)raw_value) << 32; /* * Non-USI EMR devices may contain additional tool type * information here. See WACOM_HID_WD_TOOLTYPE case for * more details. */ if (value >> 20 == 1) { - wacom_wac->id[0] |= value & 0xFFFFF; + wacom_wac->id[0] |= raw_value & 0xFFFFF; } } return; @@ -2339,7 +2341,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field * bitwise OR so the complete value can be built * up over time :( */ - wacom_wac->id[0] |= value; + wacom_wac->id[0] |= wacom_s32tou(value, field->report_size); return; case WACOM_HID_WD_OFFSETLEFT: if (features->offset_left && value != features->offset_left) -- cgit v1.2.3 From f382b0df6946d48fae80a2201ccff43b41382099 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 28 Oct 2019 19:13:58 +0200 Subject: net/mlx5e: Fix eswitch debug print of max fdb flow The value is already the calculation so remove the log prefix. Fixes: e52c28024008 ("net/mlx5: E-Switch, Add chains and priorities") Signed-off-by: Roi Dayan Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 369499e88fe8..9004a07e457a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1079,7 +1079,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) MLX5_CAP_GEN(dev, max_flow_counter_15_0); fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); - esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n", + esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(%d))\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, fdb_max); -- cgit v1.2.3 From 22f83150f03e119f61f4d00d6884639871ba6859 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 29 Oct 2019 13:28:17 +0200 Subject: net/mlx5: DR, Fix memory leak in modify action destroy The rewrite data was no freed. Fixes: 9db810ed2d37 ("net/mlx5: DR, Expose steering action functionality") Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index b74b7d0f6590..004c56c2fc0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -1577,6 +1577,7 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action) break; case DR_ACTION_TYP_MODIFY_HDR: mlx5dr_icm_free_chunk(action->rewrite.chunk); + kfree(action->rewrite.data); refcount_dec(&action->rewrite.dmn->refcount); break; default: -- cgit v1.2.3 From 260986fcff81a0c147a5d169f1ad52aab95804d5 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 29 Oct 2019 18:18:10 +0200 Subject: net/mlx5: DR, Fix memory leak during rule creation During rule creation hw_ste_arr was not freed. Fixes: 41d07074154c ("net/mlx5: DR, Expose steering rule functionality") Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index e8b656075c6f..5dcb8baf491a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -1096,6 +1096,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, if (htbl) mlx5dr_htbl_put(htbl); + kfree(hw_ste_arr); + return 0; free_ste: -- cgit v1.2.3 From 950d3af70ea89cf7ac51d734a634174013631192 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Tue, 29 Oct 2019 11:44:24 +0200 Subject: net/mlx5e: Use correct enum to determine uplink port For vlan push action, if eswitch flow source capability is enabled, flow source value compared with MLX5_VPORT_UPLINK enum, to determine uplink port. This lead to syndrome in dmesg if try to add vlan push action. For example: $ tc filter add dev vxlan0 ingress protocol ip prio 1 flower \ enc_dst_port 4789 \ action tunnel_key unset pipe \ action vlan push id 20 pipe \ action mirred egress redirect dev ens1f0_0 $ dmesg ... [ 2456.883693] mlx5_core 0000:82:00.0: mlx5_cmd_check:756:(pid 5273): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0xa9c090) Use the correct enum value MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK. Fixes: bb204dcf39fe ("net/mlx5e: Determine source port properly for vlan push action") Signed-off-by: Dmytro Linkin Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 7879e1746297..366bda1bb1c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -183,7 +183,8 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, u32 port_mask, port_value; if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) - return spec->flow_context.flow_source == MLX5_VPORT_UPLINK; + return spec->flow_context.flow_source == + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; port_mask = MLX5_GET(fte_match_param, spec->match_criteria, misc_parameters.source_port); -- cgit v1.2.3 From 7afb3e575e5aa9f5a200a3eb3f45d8130f6d6601 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 5 Nov 2019 23:50:13 +0200 Subject: net: mscc: ocelot: don't handle netdev events for other netdevs The check that the event is actually for this device should be moved from the "port" handler to the net device handler. Otherwise the port handler will deny bonding configuration for other net devices in the same system (like enetc in the LS1028A) that don't have the lag_upper_info->tx_type restriction that ocelot has. Fixes: dc96ee3730fc ("net: mscc: ocelot: add bonding support") Signed-off-by: Claudiu Manoil Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 344539c0d3aa..dbf09fcf61f1 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1680,9 +1680,6 @@ static int ocelot_netdevice_port_event(struct net_device *dev, struct ocelot_port *ocelot_port = netdev_priv(dev); int err = 0; - if (!ocelot_netdevice_dev_check(dev)) - return 0; - switch (event) { case NETDEV_CHANGEUPPER: if (netif_is_bridge_master(info->upper_dev)) { @@ -1719,6 +1716,9 @@ static int ocelot_netdevice_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); int ret = 0; + if (!ocelot_netdevice_dev_check(dev)) + return 0; + if (event == NETDEV_PRECHANGEUPPER && netif_is_lag_master(info->upper_dev)) { struct netdev_lag_upper_info *lag_upper_info = info->upper_info; -- cgit v1.2.3 From 3b3eed8eec47259939ee6c3d58aea1c311ddee3b Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 5 Nov 2019 23:50:14 +0200 Subject: net: mscc: ocelot: fix NULL pointer on LAG slave removal lag_upper_info may be NULL on slave removal. Fixes: dc96ee3730fc ("net: mscc: ocelot: add bonding support") Signed-off-by: Claudiu Manoil Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index dbf09fcf61f1..672ea1342add 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1724,7 +1724,8 @@ static int ocelot_netdevice_event(struct notifier_block *unused, struct netdev_lag_upper_info *lag_upper_info = info->upper_info; struct netlink_ext_ack *extack; - if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + if (lag_upper_info && + lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { extack = netdev_notifier_info_to_extack(&info->info); NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type"); -- cgit v1.2.3 From 17fdd7638cb687cd7f15a48545f25d738f0101e0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 6 Nov 2019 00:01:40 +0200 Subject: net: mscc: ocelot: fix __ocelot_rmw_ix prototype The "read-modify-write register index" function is declared with a confusing prototype: the "mask" and "reg" arguments are swapped. Fortunately, this does not affect callers so far. Both arguments are u32, and the wrapper macros (ocelot_rmw_ix etc) have the arguments in the correct order (the one from ocelot_io.c). Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index e40773c01a44..06ac806052bc 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -523,7 +523,7 @@ void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset); #define ocelot_write_rix(ocelot, val, reg, ri) __ocelot_write_ix(ocelot, val, reg, reg##_RSZ * (ri)) #define ocelot_write(ocelot, val, reg) __ocelot_write_ix(ocelot, val, reg, 0) -void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 mask, +void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, u32 offset); #define ocelot_rmw_ix(ocelot, val, m, reg, gi, ri) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) #define ocelot_rmw_gix(ocelot, val, m, reg, gi) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi)) -- cgit v1.2.3 From 105401b659b7eb9cb42d6b5b75d5c049ad4b3dca Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 1 Nov 2019 10:37:54 -0500 Subject: drm/shmem: Add docbook comments for drm_gem_shmem_object madvise fields Add missing docbook comments to madvise fields in struct drm_gem_shmem_object which fixes these warnings: include/drm/drm_gem_shmem_helper.h:87: warning: Function parameter or member 'madv' not described in 'drm_gem_shmem_object' include/drm/drm_gem_shmem_helper.h:87: warning: Function parameter or member 'madv_list' not described in 'drm_gem_shmem_object' Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers") Reported-by: Sean Paul Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Rob Herring Reviewed-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20191101153754.22803-1-robh@kernel.org --- include/drm/drm_gem_shmem_helper.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 01f514521687..7865e6b5d36c 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -44,7 +44,20 @@ struct drm_gem_shmem_object { */ unsigned int pages_use_count; + /** + * @madv: State for madvise + * + * 0 is active/inuse. + * A negative value is the object is purged. + * Positive values are driver specific and not used by the helpers. + */ int madv; + + /** + * @madv_list: List entry for madvise tracking + * + * Typically used by drivers to track purgeable objects + */ struct list_head madv_list; /** -- cgit v1.2.3 From b0814361a25cba73a224548843ed92d8ea78715a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 5 Nov 2019 08:09:51 -0800 Subject: blkcg: make blkcg_print_stat() print stats only for online blkgs blkcg_print_stat() iterates blkgs under RCU and doesn't test whether the blkg is online. This can call into pd_stat_fn() on a pd which is still being initialized leading to an oops. The heaviest operation - recursively summing up rwstat counters - is already done while holding the queue_lock. Expand queue_lock to cover the other operations and skip the blkg if it isn't online yet. The online state is protected by both blkcg and queue locks, so this guarantees that only online blkgs are processed. Signed-off-by: Tejun Heo Reported-by: Roman Gushchin Cc: Josef Bacik Fixes: 903d23f0a354 ("blk-cgroup: allow controllers to output their own stats") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5d21027b1faf..1eb8895be4c6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -934,9 +934,14 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) int i; bool has_stats = false; + spin_lock_irq(&blkg->q->queue_lock); + + if (!blkg->online) + goto skip; + dname = blkg_dev_name(blkg); if (!dname) - continue; + goto skip; /* * Hooray string manipulation, count is the size written NOT @@ -946,8 +951,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) */ off += scnprintf(buf+off, size-off, "%s ", dname); - spin_lock_irq(&blkg->q->queue_lock); - blkg_rwstat_recursive_sum(blkg, NULL, offsetof(struct blkcg_gq, stat_bytes), &rwstat); rbytes = rwstat.cnt[BLKG_RWSTAT_READ]; @@ -960,8 +963,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) wios = rwstat.cnt[BLKG_RWSTAT_WRITE]; dios = rwstat.cnt[BLKG_RWSTAT_DISCARD]; - spin_unlock_irq(&blkg->q->queue_lock); - if (rbytes || wbytes || rios || wios) { has_stats = true; off += scnprintf(buf+off, size-off, @@ -999,6 +1000,8 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) seq_commit(sf, -1); } } + skip: + spin_unlock_irq(&blkg->q->queue_lock); } rcu_read_unlock(); -- cgit v1.2.3 From 02b1fa07bb58f5d1f349b5b09eb936739a7b20fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Nov 2019 14:24:34 -0800 Subject: net/tls: don't pay attention to sk_write_pending when pushing partial records sk_write_pending being not zero does not guarantee that partial record will be pushed. If the thread waiting for memory times out the pending record may get stuck. In case of tls_device there is no path where parial record is set and writer present in the first place. Partial record is set only in tls_push_sg() and tls_push_sg() will return an error immediately. All tls_device callers of tls_push_sg() will return (and not wait for memory) if it failed. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_device.c | 4 +++- net/tls/tls_sw.c | 9 +++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index f959487c5cd1..5a3715ddc592 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -623,9 +623,11 @@ static int tls_device_push_pending_record(struct sock *sk, int flags) void tls_device_write_space(struct sock *sk, struct tls_context *ctx) { - if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) { + if (tls_is_partially_sent_record(ctx)) { gfp_t sk_allocation = sk->sk_allocation; + WARN_ON_ONCE(sk->sk_write_pending); + sk->sk_allocation = GFP_ATOMIC; tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL | diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c2b5e0d2ba1a..e155b792df0b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2180,12 +2180,9 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx); /* Schedule the transmission if tx list is ready */ - if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) { - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, - &tx_ctx->tx_bitmask)) - schedule_delayed_work(&tx_ctx->tx_work.work, 0); - } + if (is_tx_ready(tx_ctx) && + !test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask)) + schedule_delayed_work(&tx_ctx->tx_work.work, 0); } void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) -- cgit v1.2.3 From 79ffe6087e9145d2377385cac48d0d6a6b4225a5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Nov 2019 14:24:35 -0800 Subject: net/tls: add a TX lock TLS TX needs to release and re-acquire the socket lock if send buffer fills up. TLS SW TX path currently depends on only allowing one thread to enter the function by the abuse of sk_write_pending. If another writer is already waiting for memory no new ones are allowed in. This has two problems: - writers don't wake other threads up when they leave the kernel; meaning that this scheme works for single extra thread (second application thread or delayed work) because memory becoming available will send a wake up request, but as Mallesham and Pooja report with larger number of threads it leads to threads being put to sleep indefinitely; - the delayed work does not get _scheduled_ but it may _run_ when other writers are present leading to crashes as writers don't expect state to change under their feet (same records get pushed and freed multiple times); it's hard to reliably bail from the work, however, because the mere presence of a writer does not guarantee that the writer will push pending records before exiting. Ensuring wakeups always happen will make the code basically open code a mutex. Just use a mutex. The TLS HW TX path does not have any locking (not even the sk_write_pending hack), yet it uses a per-socket sg_tx_data array to push records. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: Mallesham Jatharakonda Reported-by: Pooja Trivedi Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/tls.h | 5 +++++ net/tls/tls_device.c | 6 ++++++ net/tls/tls_main.c | 2 ++ net/tls/tls_sw.c | 21 +++++++-------------- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index c664e6dba0d1..794e297483ea 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -269,6 +270,10 @@ struct tls_context { bool in_tcp_sendpages; bool pending_open_record_frags; + + struct mutex tx_lock; /* protects partially_sent_* fields and + * per-type TX fields + */ unsigned long flags; /* cache cold stuff */ diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 5a3715ddc592..683d00837693 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -523,8 +523,10 @@ last_record: int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { unsigned char record_type = TLS_RECORD_TYPE_DATA; + struct tls_context *tls_ctx = tls_get_ctx(sk); int rc; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); if (unlikely(msg->msg_controllen)) { @@ -538,12 +540,14 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) out: release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return rc; } int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { + struct tls_context *tls_ctx = tls_get_ctx(sk); struct iov_iter msg_iter; char *kaddr = kmap(page); struct kvec iov; @@ -552,6 +556,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, if (flags & MSG_SENDPAGE_NOTLAST) flags |= MSG_MORE; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); if (flags & MSG_OOB) { @@ -568,6 +573,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, out: release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return rc; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ac88877dcade..0775ae40fcfb 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -267,6 +267,7 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx) memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send)); memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv)); + mutex_destroy(&ctx->tx_lock); if (sk) kfree_rcu(ctx, rcu); @@ -612,6 +613,7 @@ static struct tls_context *create_ctx(struct sock *sk) if (!ctx) return NULL; + mutex_init(&ctx->tx_lock); rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = sk->sk_prot; return ctx; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e155b792df0b..446f23c1f3ce 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -897,15 +897,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) return -ENOTSUPP; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); - /* Wait till there is any pending write on socket */ - if (unlikely(sk->sk_write_pending)) { - ret = wait_on_pending_writer(sk, &timeo); - if (unlikely(ret)) - goto send_end; - } - if (unlikely(msg->msg_controllen)) { ret = tls_proccess_cmsg(sk, msg, &record_type); if (ret) { @@ -1091,6 +1085,7 @@ send_end: ret = sk_stream_error(sk, msg->msg_flags, ret); release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return copied ? copied : ret; } @@ -1114,13 +1109,6 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - /* Wait till there is any pending write on socket */ - if (unlikely(sk->sk_write_pending)) { - ret = wait_on_pending_writer(sk, &timeo); - if (unlikely(ret)) - goto sendpage_end; - } - /* Call the sk_stream functions to manage the sndbuf mem. */ while (size > 0) { size_t copy, required_size; @@ -1219,15 +1207,18 @@ sendpage_end: int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { + struct tls_context *tls_ctx = tls_get_ctx(sk); int ret; if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) return -ENOTSUPP; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); ret = tls_sw_do_sendpage(sk, page, offset, size, flags); release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return ret; } @@ -2170,9 +2161,11 @@ static void tx_work_handler(struct work_struct *work) if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) return; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); tls_tx_records(sk, -1); release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); } void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) -- cgit v1.2.3 From 41098af59d8d753aa8d3bb4310cc4ecb61fc82c7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Nov 2019 14:24:36 -0800 Subject: selftests/tls: add test for concurrent recv and send Add a test which spawns 16 threads and performs concurrent send and recv calls on the same socket. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 108 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 4c285b6e1db8..1c8f194d6556 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -898,6 +898,114 @@ TEST_F(tls, nonblocking) } } +static void +test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self, + bool sendpg, unsigned int n_readers, unsigned int n_writers) +{ + const unsigned int n_children = n_readers + n_writers; + const size_t data = 6 * 1000 * 1000; + const size_t file_sz = data / 100; + size_t read_bias, write_bias; + int i, fd, child_id; + char buf[file_sz]; + pid_t pid; + + /* Only allow multiples for simplicity */ + ASSERT_EQ(!(n_readers % n_writers) || !(n_writers % n_readers), true); + read_bias = n_writers / n_readers ?: 1; + write_bias = n_readers / n_writers ?: 1; + + /* prep a file to send */ + fd = open("/tmp/", O_TMPFILE | O_RDWR, 0600); + ASSERT_GE(fd, 0); + + memset(buf, 0xac, file_sz); + ASSERT_EQ(write(fd, buf, file_sz), file_sz); + + /* spawn children */ + for (child_id = 0; child_id < n_children; child_id++) { + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) + break; + } + + /* parent waits for all children */ + if (pid) { + for (i = 0; i < n_children; i++) { + int status; + + wait(&status); + EXPECT_EQ(status, 0); + } + + return; + } + + /* Split threads for reading and writing */ + if (child_id < n_readers) { + size_t left = data * read_bias; + char rb[8001]; + + while (left) { + int res; + + res = recv(self->cfd, rb, + left > sizeof(rb) ? sizeof(rb) : left, 0); + + EXPECT_GE(res, 0); + left -= res; + } + } else { + size_t left = data * write_bias; + + while (left) { + int res; + + ASSERT_EQ(lseek(fd, 0, SEEK_SET), 0); + if (sendpg) + res = sendfile(self->fd, fd, NULL, + left > file_sz ? file_sz : left); + else + res = send(self->fd, buf, + left > file_sz ? file_sz : left, 0); + + EXPECT_GE(res, 0); + left -= res; + } + } +} + +TEST_F(tls, mutliproc_even) +{ + test_mutliproc(_metadata, self, false, 6, 6); +} + +TEST_F(tls, mutliproc_readers) +{ + test_mutliproc(_metadata, self, false, 4, 12); +} + +TEST_F(tls, mutliproc_writers) +{ + test_mutliproc(_metadata, self, false, 10, 2); +} + +TEST_F(tls, mutliproc_sendpage_even) +{ + test_mutliproc(_metadata, self, true, 6, 6); +} + +TEST_F(tls, mutliproc_sendpage_readers) +{ + test_mutliproc(_metadata, self, true, 4, 12); +} + +TEST_F(tls, mutliproc_sendpage_writers) +{ + test_mutliproc(_metadata, self, true, 10, 2); +} + TEST_F(tls, control_msg) { if (self->notls) -- cgit v1.2.3 From 98f3375505b8d6517bd6710bc6d4f6289eeb30aa Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 6 Nov 2019 10:49:57 +0100 Subject: net/smc: fix ethernet interface refcounting If a pnet table entry is to be added mentioning a valid ethernet interface, but an invalid infiniband or ISM device, the dev_put() operation for the ethernet interface is called twice, resulting in a negative refcount for the ethernet interface, which disables removal of such a network interface. This patch removes one of the dev_put() calls. Fixes: 890a2cb4a966 ("net/smc: rework pnet table") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 2920b006f65c..571e6d84da3b 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -376,8 +376,6 @@ static int smc_pnet_fill_entry(struct net *net, return 0; error: - if (pnetelem->ndev) - dev_put(pnetelem->ndev); return rc; } -- cgit v1.2.3 From 38264de0dce80d223f358ce47512378fae0de586 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Tue, 5 Nov 2019 18:16:38 +0800 Subject: drm/amd/swSMU: fix smu workload bit map error fix workload bit (WORKLOAD_PPLIB_COMPUTE_BIT) map error on vega20 and navi asic. fix commit: drm/amd/powerplay: add function get_workload_type_map for swsmu Signed-off-by: Kevin Wang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 2 +- drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 0b461404af6b..3ec5a10a7c4d 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -205,7 +205,7 @@ static struct smu_11_0_cmn2aisc_mapping navi10_workload_map[PP_SMC_POWER_PROFILE WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT), - WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT), + WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT), }; diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index bbd8ebd58434..92c393f613d3 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -219,7 +219,7 @@ static struct smu_11_0_cmn2aisc_mapping vega20_workload_map[PP_SMC_POWER_PROFILE WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT), - WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT), + WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT), }; -- cgit v1.2.3 From 6a299d7aaa97dfde5988d8f9e2fa2c046b5793ff Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 5 Nov 2019 18:13:49 +0800 Subject: drm/amdgpu: register gpu instance before fan boost feature enablment Otherwise, the feature enablement will be skipped due to wrong count. Fixes: beff74bc6e0fa91 ("drm/amdgpu: fix a race in GPU reset with IB test (v2)") Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5a1939dbd4e3..7a6c837c0a85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2885,6 +2885,13 @@ fence_driver_init: DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); } + /* + * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. + * Otherwise the mgpu fan boost feature will be skipped due to the + * gpu instance is counted less. + */ + amdgpu_register_gpu_instance(adev); + /* enable clockgating, etc. after ib tests, etc. since some blocks require * explicit gating rather than handling it automatically. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index d55f5baa83d3..a042ef471fbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -190,7 +190,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) pm_runtime_put_autosuspend(dev->dev); } - amdgpu_register_gpu_instance(adev); out: if (r) { /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ -- cgit v1.2.3 From 589b64a7e39720b1784a1a26569c57cb977936ef Mon Sep 17 00:00:00 2001 From: changzhu Date: Thu, 10 Oct 2019 11:02:33 +0800 Subject: drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GRBM register interface is now capable of bursting 1 cycle per register wr->wr, wr->rd much faster than previous muticycle per transaction done interface. This has caused a problem where status registers requiring HW to update have a 1 cycle delay, due to the register update having to go through GRBM. For cp ucode, it has realized dummy read in cp firmware.It covers the use of WAIT_REG_MEM operation 1 case only.So it needs to call gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to update firmware in case firmware is too old to have function to realize dummy read in cp firmware. For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is moved to gfxhub in gfx10. So it needs to add dummy read in driver between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0. Signed-off-by: changzhu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 48 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 +++--- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 13 ++++++++- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 6ee4021910e2..6d19183b478b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -289,6 +289,7 @@ struct amdgpu_gfx { uint32_t mec2_feature_version; bool mec_fw_write_wait; bool me_fw_write_wait; + bool cp_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; unsigned num_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 8dfc775626a7..53090eae0082 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -564,6 +564,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.cp_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: + if ((adev->gfx.me_fw_version >= 0x00000046) && + (adev->gfx.me_feature_version >= 27) && + (adev->gfx.pfp_fw_version >= 0x00000068) && + (adev->gfx.pfp_feature_version >= 27) && + (adev->gfx.mec_fw_version >= 0x0000005b) && + (adev->gfx.mec_feature_version >= 27)) + adev->gfx.cp_fw_write_wait = true; + break; + default: + break; + } + + if (adev->gfx.cp_fw_write_wait == false) + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ + GRBM requires 1-cycle delay in cp firmware\n"); +} + + static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) { const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -832,6 +858,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) } } + gfx_v10_0_check_fw_write_wait(adev); out: if (err) { dev_err(adev->dev, @@ -4765,6 +4792,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); } +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = false; + + fw_version_ok = adev->gfx.cp_fw_write_wait; + + if (fw_version_ok) + gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); + else + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + static void gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -5155,6 +5200,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_tmz = gfx_v10_0_ring_emit_tmz, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { @@ -5188,6 +5234,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { @@ -5218,6 +5265,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .emit_rreg = gfx_v10_0_ring_emit_rreg, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 354e6200ca9a..5c7d5f73f54f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); - - /* wait for the invalidate to complete */ - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, - 1 << vmid, 1 << vmid); + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, + req, 1 << vmid); return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index f6e81680dd7e..8493bfbbc148 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1173,6 +1173,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + /* wait for a cycle to reset vm_inv_eng*_ack */ + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + static int sdma_v5_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1588,7 +1598,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ /* sdma_v5_0_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, @@ -1602,6 +1612,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .pad_ib = sdma_v5_0_ring_pad_ib, .emit_wreg = sdma_v5_0_ring_emit_wreg, .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_0_ring_init_cond_exec, .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, .preempt_ib = sdma_v5_0_ring_preempt_ib, -- cgit v1.2.3 From 440a7a54e7ec012ec8b27c27e460dfd6f9a24ddb Mon Sep 17 00:00:00 2001 From: changzhu Date: Tue, 5 Nov 2019 18:29:12 +0800 Subject: drm/amdgpu: add warning for GRBM 1-cycle delay issue in gfx9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It needs to add warning to update firmware in gfx9 in case that firmware is too old to have function to realize dummy read in cp firmware. Signed-off-by: changzhu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index dcadc73bffd2..2b04cac4b308 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -973,6 +973,13 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.me_fw_write_wait = false; adev->gfx.mec_fw_write_wait = false; + if ((adev->gfx.mec_fw_version < 0x000001a5) || + (adev->gfx.mec_feature_version < 46) || + (adev->gfx.pfp_fw_version < 0x000000b7) || + (adev->gfx.pfp_feature_version < 46)) + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ + GRBM requires 1-cycle delay in cp firmware\n"); + switch (adev->asic_type) { case CHIP_VEGA10: if ((adev->gfx.me_fw_version >= 0x0000009c) && -- cgit v1.2.3 From 77a31602216e0368307dd2e79427dd1c9ca74a8e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 29 Oct 2019 10:36:22 -0400 Subject: drm/amdgpu/renoir: move gfxoff handling into gfx9 module To properly handle the option parsing ordering. Reviewed-by: Yong Zhao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/soc15.c | 5 ----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2b04cac4b308..dfca83a2de47 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1046,6 +1046,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) !adev->gfx.rlc.is_rlc_v2_1)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; + break; + case CHIP_RENOIR: if (adev->pm.pp_feature & PP_GFXOFF_MASK) adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_CP | diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f8ab80c8801b..4ccfcdf8f16a 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1186,11 +1186,6 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG; adev->external_rev_id = adev->rev_id + 0x91; - - if (adev->pm.pp_feature & PP_GFXOFF_MASK) - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | - AMD_PG_SUPPORT_CP | - AMD_PG_SUPPORT_RLC_SMU_HS; break; default: /* FIXME: not supported yet */ -- cgit v1.2.3 From 2c409ba81be25516afe05ae27a4a15da01740b01 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Oct 2019 10:21:28 -0400 Subject: drm/radeon: fix si_enable_smc_cac() failed issue Need to set the dte flag on this asic. Port the fix from amdgpu: 5cb818b861be114 ("drm/amd/amdgpu: fix si_enable_smc_cac() failed issue") Reviewed-by: Yong Zhao Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/si_dpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 460fd98e40a7..a0b382a637a6 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -1958,6 +1958,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev) case 0x682C: si_pi->cac_weights = cac_weights_cape_verde_pro; si_pi->dte_data = dte_data_sun_xt; + update_dte_from_pl2 = true; break; case 0x6825: case 0x6827: -- cgit v1.2.3 From d243af7ab9feb49f11f2c0050d2077e2d9556f9b Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 6 Nov 2019 13:58:15 -0800 Subject: SMB3: Fix persistent handles reconnect When the client hits a network reconnect, it re-opens every open file with a create context to reconnect a persistent handle. All create context types should be 8-bytes aligned but the padding was missed for that one. As a result, some servers don't allow us to reconnect handles and return an error. The problem occurs when the problematic context is not at the end of the create request packet. Fix this by adding a proper padding at the end of the reconnect persistent handle context. Cc: Stable # 4.19.x Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index ea735d59c36e..0abfde6d0b05 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -838,6 +838,7 @@ struct create_durable_handle_reconnect_v2 { struct create_context ccontext; __u8 Name[8]; struct durable_reconnect_context_v2 dcontext; + __u8 Pad[4]; } __packed; /* See MS-SMB2 2.2.13.2.5 */ -- cgit v1.2.3 From 4d7c47e34fab0d25790bb6e85b85e26fdf0090d5 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:02:55 +0100 Subject: net: stmmac: gmac4: bitrev32 returns u32 The bitrev32 function returns an u32 var, not an int. Fix it. Fixes: 477286b53f55 ("stmmac: add GMAC4 core support") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 5a7b0aca1d31..66e60c7e9850 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -432,7 +432,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw, * bits used depends on the hardware configuration * selected at core configuration time. */ - int bit_nr = bitrev32(~crc32_le(~0, ha->addr, + u32 bit_nr = bitrev32(~crc32_le(~0, ha->addr, ETH_ALEN)) >> (32 - mcbitslog2); /* The most significant bit determines the register to * use (H/L) while the other 5 bits determine the bit -- cgit v1.2.3 From 3d00e45d498fd5347cea653ef494c56731b651e0 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:02:56 +0100 Subject: net: stmmac: xgmac: bitrev32 returns u32 The bitrev32 function returns an u32 var, not an int. Fix it. Fixes: 0efedbf11f07 ("net: stmmac: xgmac: Fix XGMAC selftests") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 5031398e612c..01075a955c66 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -463,7 +463,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw, value |= XGMAC_FILTER_HMC; netdev_for_each_mc_addr(ha, dev) { - int nr = (bitrev32(~crc32_le(~0, ha->addr, 6)) >> + u32 nr = (bitrev32(~crc32_le(~0, ha->addr, 6)) >> (32 - mcbitslog2)); mc_filter[nr >> 5] |= (1 << (nr & 0x1F)); } -- cgit v1.2.3 From eeb9d745169847ecde4aa311eb618beb984629b0 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:02:57 +0100 Subject: net: stmmac: selftests: Prevent false positives in filter tests In L2 tests that filter packets by destination MAC address we need to prevent false positives that can occur if we add an address that collides with the existing ones. To fix this, lets manually check if the new address to be added is already present in the NIC and use a different one if so. For Hash filtering this also envolves converting the address to the hash. Fixes: 091810dbded9 ("net: stmmac: Introduce selftests support") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_selftests.c | 134 +++++++++++++++------ 1 file changed, 94 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index e4ac3c401432..ac3f658105c0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -6,7 +6,9 @@ * Author: Jose Abreu */ +#include #include +#include #include #include #include @@ -485,12 +487,48 @@ static int stmmac_filter_check(struct stmmac_priv *priv) return -EOPNOTSUPP; } +static bool stmmac_hash_check(struct stmmac_priv *priv, unsigned char *addr) +{ + int mc_offset = 32 - priv->hw->mcast_bits_log2; + struct netdev_hw_addr *ha; + u32 hash, hash_nr; + + /* First compute the hash for desired addr */ + hash = bitrev32(~crc32_le(~0, addr, 6)) >> mc_offset; + hash_nr = hash >> 5; + hash = 1 << (hash & 0x1f); + + /* Now, check if it collides with any existing one */ + netdev_for_each_mc_addr(ha, priv->dev) { + u32 nr = bitrev32(~crc32_le(~0, ha->addr, ETH_ALEN)) >> mc_offset; + if (((nr >> 5) == hash_nr) && ((1 << (nr & 0x1f)) == hash)) + return false; + } + + /* No collisions, address is good to go */ + return true; +} + +static bool stmmac_perfect_check(struct stmmac_priv *priv, unsigned char *addr) +{ + struct netdev_hw_addr *ha; + + /* Check if it collides with any existing one */ + netdev_for_each_uc_addr(ha, priv->dev) { + if (!memcmp(ha->addr, addr, ETH_ALEN)) + return false; + } + + /* No collisions, address is good to go */ + return true; +} + static int stmmac_test_hfilt(struct stmmac_priv *priv) { - unsigned char gd_addr[ETH_ALEN] = {0x01, 0xee, 0xdd, 0xcc, 0xbb, 0xaa}; - unsigned char bd_addr[ETH_ALEN] = {0x01, 0x01, 0x02, 0x03, 0x04, 0x05}; + unsigned char gd_addr[ETH_ALEN] = {0xf1, 0xee, 0xdd, 0xcc, 0xbb, 0xaa}; + unsigned char bd_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff}; struct stmmac_packet_attrs attr = { }; - int ret; + int ret, tries = 256; ret = stmmac_filter_check(priv); if (ret) @@ -499,6 +537,16 @@ static int stmmac_test_hfilt(struct stmmac_priv *priv) if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins) return -EOPNOTSUPP; + while (--tries) { + /* We only need to check the bd_addr for collisions */ + bd_addr[ETH_ALEN - 1] = tries; + if (stmmac_hash_check(priv, bd_addr)) + break; + } + + if (!tries) + return -EOPNOTSUPP; + ret = dev_mc_add(priv->dev, gd_addr); if (ret) return ret; @@ -523,13 +571,25 @@ cleanup: static int stmmac_test_pfilt(struct stmmac_priv *priv) { - unsigned char gd_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77}; - unsigned char bd_addr[ETH_ALEN] = {0x08, 0x00, 0x22, 0x33, 0x44, 0x55}; + unsigned char gd_addr[ETH_ALEN] = {0xf0, 0x01, 0x44, 0x55, 0x66, 0x77}; + unsigned char bd_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff}; struct stmmac_packet_attrs attr = { }; - int ret; + int ret, tries = 256; if (stmmac_filter_check(priv)) return -EOPNOTSUPP; + if (netdev_uc_count(priv->dev) >= priv->hw->unicast_filter_entries) + return -EOPNOTSUPP; + + while (--tries) { + /* We only need to check the bd_addr for collisions */ + bd_addr[ETH_ALEN - 1] = tries; + if (stmmac_perfect_check(priv, bd_addr)) + break; + } + + if (!tries) + return -EOPNOTSUPP; ret = dev_uc_add(priv->dev, gd_addr); if (ret) @@ -553,39 +613,31 @@ cleanup: return ret; } -static int stmmac_dummy_sync(struct net_device *netdev, const u8 *addr) -{ - return 0; -} - -static void stmmac_test_set_rx_mode(struct net_device *netdev) -{ - /* As we are in test mode of ethtool we already own the rtnl lock - * so no address will change from user. We can just call the - * ndo_set_rx_mode() callback directly */ - if (netdev->netdev_ops->ndo_set_rx_mode) - netdev->netdev_ops->ndo_set_rx_mode(netdev); -} - static int stmmac_test_mcfilt(struct stmmac_priv *priv) { - unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77}; - unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77}; + unsigned char uc_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff}; + unsigned char mc_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff}; struct stmmac_packet_attrs attr = { }; - int ret; + int ret, tries = 256; if (stmmac_filter_check(priv)) return -EOPNOTSUPP; - if (!priv->hw->multicast_filter_bins) + if (netdev_uc_count(priv->dev) >= priv->hw->unicast_filter_entries) return -EOPNOTSUPP; - /* Remove all MC addresses */ - __dev_mc_unsync(priv->dev, NULL); - stmmac_test_set_rx_mode(priv->dev); + while (--tries) { + /* We only need to check the mc_addr for collisions */ + mc_addr[ETH_ALEN - 1] = tries; + if (stmmac_hash_check(priv, mc_addr)) + break; + } + + if (!tries) + return -EOPNOTSUPP; ret = dev_uc_add(priv->dev, uc_addr); if (ret) - goto cleanup; + return ret; attr.dst = uc_addr; @@ -602,30 +654,34 @@ static int stmmac_test_mcfilt(struct stmmac_priv *priv) cleanup: dev_uc_del(priv->dev, uc_addr); - __dev_mc_sync(priv->dev, stmmac_dummy_sync, NULL); - stmmac_test_set_rx_mode(priv->dev); return ret; } static int stmmac_test_ucfilt(struct stmmac_priv *priv) { - unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77}; - unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77}; + unsigned char uc_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff}; + unsigned char mc_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff}; struct stmmac_packet_attrs attr = { }; - int ret; + int ret, tries = 256; if (stmmac_filter_check(priv)) return -EOPNOTSUPP; - if (!priv->hw->multicast_filter_bins) + if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins) return -EOPNOTSUPP; - /* Remove all UC addresses */ - __dev_uc_unsync(priv->dev, NULL); - stmmac_test_set_rx_mode(priv->dev); + while (--tries) { + /* We only need to check the uc_addr for collisions */ + uc_addr[ETH_ALEN - 1] = tries; + if (stmmac_perfect_check(priv, uc_addr)) + break; + } + + if (!tries) + return -EOPNOTSUPP; ret = dev_mc_add(priv->dev, mc_addr); if (ret) - goto cleanup; + return ret; attr.dst = mc_addr; @@ -642,8 +698,6 @@ static int stmmac_test_ucfilt(struct stmmac_priv *priv) cleanup: dev_mc_del(priv->dev, mc_addr); - __dev_uc_sync(priv->dev, stmmac_dummy_sync, NULL); - stmmac_test_set_rx_mode(priv->dev); return ret; } -- cgit v1.2.3 From 96147375d49f9b523e6462992ddcfb753835c42f Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:02:58 +0100 Subject: net: stmmac: xgmac: Only get SPH header len if available Split Header length is only available when L34T == 0. Fix this by correctly checking if L34T is zero before trying to get Header length. Fixes: 67afd6d1cfdf ("net: stmmac: Add Split Header support and enable it in XGMAC cores") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index ae48154f933c..bd5838ce1e8a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -288,7 +288,8 @@ static int dwxgmac2_get_rx_hash(struct dma_desc *p, u32 *hash, static int dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len) { - *len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL; + if (le32_to_cpu(p->des3) & XGMAC_RDES3_L34T) + *len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL; return 0; } -- cgit v1.2.3 From 97add93fbcfa566735d6a4b96684110d356ebd35 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:02:59 +0100 Subject: net: stmmac: xgmac: Fix TSA selection When we change between Transmission Scheduling Algorithms, we need to clear previous values so that the new chosen algorithm is correctly selected. Fixes: ec6ea8e3eee9 ("net: stmmac: Add CBS support in XGMAC2") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 01075a955c66..070bd7d1ae4c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -224,6 +224,7 @@ static void dwxgmac2_config_cbs(struct mac_device_info *hw, writel(low_credit, ioaddr + XGMAC_MTL_TCx_LOCREDIT(queue)); value = readl(ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(queue)); + value &= ~XGMAC_TSA; value |= XGMAC_CC | XGMAC_CBS; writel(value, ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(queue)); } -- cgit v1.2.3 From 08c1ac3bcba8cd52449f55a065e60779a0fa2c97 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:00 +0100 Subject: net: stmmac: xgmac: Fix AV Feature detection Fix incorrect precedence of operators. For reference: AV implies AV Feature but RAV implies only RX side AV Feature. As we want full AV features we need to check RAV. Fixes: c2b69474d63b ("net: stmmac: xgmac: Correct RAVSEL field interpretation") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 965cbe3e6f51..2e814aa64a5c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -369,7 +369,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, dma_cap->eee = (hw_cap & XGMAC_HWFEAT_EEESEL) >> 13; dma_cap->atime_stamp = (hw_cap & XGMAC_HWFEAT_TSSEL) >> 12; dma_cap->av = (hw_cap & XGMAC_HWFEAT_AVSEL) >> 11; - dma_cap->av &= !(hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10; + dma_cap->av &= !((hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10); dma_cap->arpoffsel = (hw_cap & XGMAC_HWFEAT_ARPOFFSEL) >> 9; dma_cap->rmon = (hw_cap & XGMAC_HWFEAT_MMCSEL) >> 8; dma_cap->pmt_magic_frame = (hw_cap & XGMAC_HWFEAT_MGKSEL) >> 7; -- cgit v1.2.3 From 132f2f20c9866325d12c155aca06d260f358d3cb Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:01 +0100 Subject: net: stmmac: xgmac: Disable Flow Control when 1 or more queues are in AV When in AVB mode we need to disable flow control to prevent MAC from pausing in TX side. Fixes: ec6ea8e3eee9 ("net: stmmac: Add CBS support in XGMAC2") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 2e814aa64a5c..f70ca5300b82 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -470,6 +470,7 @@ static void dwxgmac2_enable_tso(void __iomem *ioaddr, bool en, u32 chan) static void dwxgmac2_qmode(void __iomem *ioaddr, u32 channel, u8 qmode) { u32 value = readl(ioaddr + XGMAC_MTL_TXQ_OPMODE(channel)); + u32 flow = readl(ioaddr + XGMAC_RX_FLOW_CTRL); value &= ~XGMAC_TXQEN; if (qmode != MTL_QUEUE_AVB) { @@ -477,6 +478,7 @@ static void dwxgmac2_qmode(void __iomem *ioaddr, u32 channel, u8 qmode) writel(0, ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(channel)); } else { value |= 0x1 << XGMAC_TXQEN_SHIFT; + writel(flow & (~XGMAC_RFE), ioaddr + XGMAC_RX_FLOW_CTRL); } writel(value, ioaddr + XGMAC_MTL_TXQ_OPMODE(channel)); -- cgit v1.2.3 From aeb18dd0769289ae9141221862319a36094fb06b Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:02 +0100 Subject: net: stmmac: xgmac: Disable MMC interrupts by default MMC interrupts were being enabled, which is not what we want because it will lead to a storm of interrupts that are not handled at all. Fix it by disabling all MMC interrupts for XGMAC. Fixes: b6cdf09f51c2 ("net: stmmac: xgmac: Implement MMC counters") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/mmc_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index a223584f5f9a..252cf48c5816 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -176,6 +176,7 @@ #define MMC_XGMAC_RX_PKT_SMD_ERR 0x22c #define MMC_XGMAC_RX_PKT_ASSEMBLY_OK 0x230 #define MMC_XGMAC_RX_FPE_FRAG 0x234 +#define MMC_XGMAC_RX_IPC_INTR_MASK 0x25c static void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode) { @@ -333,8 +334,9 @@ static void dwxgmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode) static void dwxgmac_mmc_intr_all_mask(void __iomem *mmcaddr) { - writel(MMC_DEFAULT_MASK, mmcaddr + MMC_RX_INTR_MASK); - writel(MMC_DEFAULT_MASK, mmcaddr + MMC_TX_INTR_MASK); + writel(0x0, mmcaddr + MMC_RX_INTR_MASK); + writel(0x0, mmcaddr + MMC_TX_INTR_MASK); + writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_IPC_INTR_MASK); } static void dwxgmac_read_mmc_reg(void __iomem *addr, u32 reg, u32 *dest) -- cgit v1.2.3 From cda4985a3ebb95f14fe3a9dc905820417baf79a5 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:03 +0100 Subject: net: stmmac: Fix the packet count in stmmac_rx() Currently, stmmac_rx() is counting the number of descriptors but it should count the number of packets as specified by the NAPI limit. Fix this. Fixes: ec222003bd94 ("net: stmmac: Prepare to add Split Header support") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 4e9c848c67cc..1ab1eea1556a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3506,8 +3506,6 @@ read_again: if (unlikely(status & dma_own)) break; - count++; - rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, DMA_RX_SIZE); next_entry = rx_q->cur_rx; @@ -3534,6 +3532,7 @@ read_again: goto read_again; if (unlikely(error)) { dev_kfree_skb(skb); + count++; continue; } @@ -3573,6 +3572,7 @@ read_again: skb = napi_alloc_skb(&ch->rx_napi, len); if (!skb) { priv->dev->stats.rx_dropped++; + count++; continue; } @@ -3638,6 +3638,7 @@ read_again: priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += len; + count++; } if (status & rx_not_ls) { -- cgit v1.2.3 From b2f071995b7a4f175467d683d4d18eab10810020 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:04 +0100 Subject: net: stmmac: Fix TSO descriptor with Enhanced Addressing When using addressing > 32 bits the TSO first descriptor only has the header so we can't set the payload field for this descriptor. Let's reset the variable so that buffer 2 value is zero. Fixes: a993db88d17d ("net: stmmac: Enable support for > 32 Bits addressing in XGMAC") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1ab1eea1556a..b0a16d7c6e3d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2996,6 +2996,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_set_desc_addr(priv, first, des); tmp_pay_len = pay_len; des += proto_hdr_len; + pay_len = 0; } stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue); -- cgit v1.2.3 From 7df4a3a76d34fe1f550e82c6ef368c7c40f0aaa4 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:05 +0100 Subject: net: stmmac: Fix the TX IOC in xmit path IOC bit must be only set in the last descriptor. Move the logic up a little bit to make sure it's set in the correct descriptor. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 64 ++++++++++++----------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b0a16d7c6e3d..f826365c979d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3024,6 +3024,19 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* Only the last descriptor gets to point to the skb. */ tx_q->tx_skbuff[tx_q->cur_tx] = skb; + /* Manage tx mitigation */ + tx_q->tx_count_frames += nfrags + 1; + if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && + !((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + stmmac_tx_timer_arm(priv, queue); + } else { + desc = &tx_q->dma_tx[tx_q->cur_tx]; + tx_q->tx_count_frames = 0; + stmmac_set_tx_ic(priv, desc); + priv->xstats.tx_set_ic_bit++; + } + /* We've used all descriptors we need for this skb, however, * advance cur_tx so that it references a fresh descriptor. * ndo_start_xmit will fill this descriptor the next time it's @@ -3041,19 +3054,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) priv->xstats.tx_tso_frames++; priv->xstats.tx_tso_nfrags += nfrags; - /* Manage tx mitigation */ - tx_q->tx_count_frames += nfrags + 1; - if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && - !(priv->synopsys_id >= DWMAC_CORE_4_00 && - (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - priv->hwts_tx_en)) { - stmmac_tx_timer_arm(priv, queue); - } else { - tx_q->tx_count_frames = 0; - stmmac_set_tx_ic(priv, desc); - priv->xstats.tx_set_ic_bit++; - } - if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); @@ -3225,6 +3225,27 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Only the last descriptor gets to point to the skb. */ tx_q->tx_skbuff[entry] = skb; + /* According to the coalesce parameter the IC bit for the latest + * segment is reset and the timer re-started to clean the tx status. + * This approach takes care about the fragments: desc is the first + * element in case of no SG. + */ + tx_q->tx_count_frames += nfrags + 1; + if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && + !((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + stmmac_tx_timer_arm(priv, queue); + } else { + if (likely(priv->extend_desc)) + desc = &tx_q->dma_etx[entry].basic; + else + desc = &tx_q->dma_tx[entry]; + + tx_q->tx_count_frames = 0; + stmmac_set_tx_ic(priv, desc); + priv->xstats.tx_set_ic_bit++; + } + /* We've used all descriptors we need for this skb, however, * advance cur_tx so that it references a fresh descriptor. * ndo_start_xmit will fill this descriptor the next time it's @@ -3260,23 +3281,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_bytes += skb->len; - /* According to the coalesce parameter the IC bit for the latest - * segment is reset and the timer re-started to clean the tx status. - * This approach takes care about the fragments: desc is the first - * element in case of no SG. - */ - tx_q->tx_count_frames += nfrags + 1; - if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && - !(priv->synopsys_id >= DWMAC_CORE_4_00 && - (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - priv->hwts_tx_en)) { - stmmac_tx_timer_arm(priv, queue); - } else { - tx_q->tx_count_frames = 0; - stmmac_set_tx_ic(priv, desc); - priv->xstats.tx_set_ic_bit++; - } - if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); -- cgit v1.2.3 From 9c6850fea3edefef6e7153b2c466f09155399882 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 6 Nov 2019 21:51:31 +0100 Subject: r8169: fix page read in r8168g_mdio_read Functions like phy_modify_paged() read the current page, on Realtek PHY's this means reading the value of register 0x1f. Add special handling for reading this register, similar to what we do already in r8168g_mdio_write(). Currently we read a random value that by chance seems to be 0 always. Fixes: a2928d28643e ("r8169: use paged versions of phylib MDIO access functions") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5064c292b873..c4e961ea44d5 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -916,6 +916,9 @@ static void r8168g_mdio_write(struct rtl8169_private *tp, int reg, int value) static int r8168g_mdio_read(struct rtl8169_private *tp, int reg) { + if (reg == 0x1f) + return tp->ocp_base == OCP_STD_PHY_BASE ? 0 : tp->ocp_base >> 4; + if (tp->ocp_base != OCP_STD_PHY_BASE) reg -= 0x10; -- cgit v1.2.3 From 648db0514a3808eead513d7abfaf650d02a64d5c Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 7 Nov 2019 09:30:19 +0800 Subject: net: hns3: add compatible handling for command HCLGE_OPC_PF_RST_DONE Since old firmware does not support HCLGE_OPC_PF_RST_DONE, it will return -EOPNOTSUPP to the driver when received this command. So for this case, it should just print a warning and return success to the caller. Fixes: 72e2fb07997c ("net: hns3: clear reset interrupt status in hclge_irq_handle()") Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e02e01bd9eff..16f7d0e15b4f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3587,12 +3587,28 @@ static int hclge_set_rst_done(struct hclge_dev *hdev) { struct hclge_pf_rst_done_cmd *req; struct hclge_desc desc; + int ret; req = (struct hclge_pf_rst_done_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PF_RST_DONE, false); req->pf_rst_done |= HCLGE_PF_RESET_DONE_BIT; - return hclge_cmd_send(&hdev->hw, &desc, 1); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + /* To be compatible with the old firmware, which does not support + * command HCLGE_OPC_PF_RST_DONE, just print a warning and + * return success + */ + if (ret == -EOPNOTSUPP) { + dev_warn(&hdev->pdev->dev, + "current firmware does not support command(0x%x)!\n", + HCLGE_OPC_PF_RST_DONE); + return 0; + } else if (ret) { + dev_err(&hdev->pdev->dev, "assert PF reset done fail %d!\n", + ret); + } + + return ret; } static int hclge_reset_prepare_up(struct hclge_dev *hdev) -- cgit v1.2.3 From 99a8efbb6e30b72ac98cecf81103f847abffb1e5 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 7 Nov 2019 09:33:20 +0800 Subject: NFC: st21nfca: fix double free The variable nfcid_skb is not changed in the callee nfc_hci_get_param() if error occurs. Consequently, the freed variable nfcid_skb will be freed again, resulting in a double free bug. Set nfcid_skb to NULL after releasing it to fix the bug. Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/nfc/st21nfca/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c index f9ac176cf257..2ce17932a073 100644 --- a/drivers/nfc/st21nfca/core.c +++ b/drivers/nfc/st21nfca/core.c @@ -708,6 +708,7 @@ static int st21nfca_hci_complete_target_discovered(struct nfc_hci_dev *hdev, NFC_PROTO_FELICA_MASK; } else { kfree_skb(nfcid_skb); + nfcid_skb = NULL; /* P2P in type A */ r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_F_GATE, ST21NFCA_RF_READER_F_NFCID1, -- cgit v1.2.3 From 63e006c107ff4235d2a8fd52704f283d23642537 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 4 Nov 2019 11:09:08 +0100 Subject: pinctrl: stmfx: fix valid_mask init sequence With stmfx_pinctrl_gpio_init_valid_mask callback, gpio_valid_mask was used to initialize gpiochip valid_mask for gpiolib. But gpio_valid_mask was not yet initialized. gpio_valid_mask required gpio-ranges to be registered, this is the case after gpiochip_add_data call. But init_valid_mask callback is also called under gpiochip_add_data. gpio_valid_mask initialization cannot be moved before gpiochip_add_data because gpio-ranges are not registered. So, it is not possible to use init_valid_mask callback. To avoid this issue, get rid of valid_mask and rely on ranges. Fixes: da9b142ab2c5 ("pinctrl: stmfx: Use the callback to populate valid_mask") Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20191104100908.10880-1-amelie.delaunay@st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-stmfx.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/pinctrl/pinctrl-stmfx.c b/drivers/pinctrl/pinctrl-stmfx.c index 564660028fcc..ccdf0bb21414 100644 --- a/drivers/pinctrl/pinctrl-stmfx.c +++ b/drivers/pinctrl/pinctrl-stmfx.c @@ -585,19 +585,6 @@ static int stmfx_pinctrl_gpio_function_enable(struct stmfx_pinctrl *pctl) return stmfx_function_enable(pctl->stmfx, func); } -static int stmfx_pinctrl_gpio_init_valid_mask(struct gpio_chip *gc, - unsigned long *valid_mask, - unsigned int ngpios) -{ - struct stmfx_pinctrl *pctl = gpiochip_get_data(gc); - u32 n; - - for_each_clear_bit(n, &pctl->gpio_valid_mask, ngpios) - clear_bit(n, valid_mask); - - return 0; -} - static int stmfx_pinctrl_probe(struct platform_device *pdev) { struct stmfx *stmfx = dev_get_drvdata(pdev->dev.parent); @@ -660,7 +647,6 @@ static int stmfx_pinctrl_probe(struct platform_device *pdev) pctl->gpio_chip.ngpio = pctl->pctl_desc.npins; pctl->gpio_chip.can_sleep = true; pctl->gpio_chip.of_node = np; - pctl->gpio_chip.init_valid_mask = stmfx_pinctrl_gpio_init_valid_mask; ret = devm_gpiochip_add_data(pctl->dev, &pctl->gpio_chip, pctl); if (ret) { -- cgit v1.2.3 From 32e9f6cb07348427abb2806abc67e743a2993b68 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 7 Nov 2019 01:59:23 +0000 Subject: staging: vboxsf: Remove unused including Remove including that don't need it. Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20191107015923.100013-1-yuehaibing@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vboxsf/vfsmod.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/vboxsf/vfsmod.h b/drivers/staging/vboxsf/vfsmod.h index de650d65fbe4..18f95b00fc33 100644 --- a/drivers/staging/vboxsf/vfsmod.h +++ b/drivers/staging/vboxsf/vfsmod.h @@ -10,7 +10,6 @@ #include #include -#include #include "shfl_hostintf.h" #define DIR_BUFFER_SIZE SZ_16K -- cgit v1.2.3 From 94fed404109d301312bbdf73d7007976ffca3482 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Nov 2019 17:51:08 +0000 Subject: staging: vboxsf: fix dereference of pointer dentry before it is null checked Currently the pointer dentry is being dereferenced before it is being null checked. Fix this by only dereferencing dentry once we know it is not null. Addresses-Coverity: ("Dereference before null check") Fixes: df4028658f9d ("staging: Add VirtualBox guest shared folder (vboxsf) support") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20191105175108.79824-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vboxsf/utils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/vboxsf/utils.c b/drivers/staging/vboxsf/utils.c index 1870b69c824e..34a49e6f74fc 100644 --- a/drivers/staging/vboxsf/utils.c +++ b/drivers/staging/vboxsf/utils.c @@ -174,7 +174,7 @@ int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info) int vboxsf_inode_revalidate(struct dentry *dentry) { - struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct vboxsf_sbi *sbi; struct vboxsf_inode *sf_i; struct shfl_fsobjinfo info; struct timespec64 prev_mtime; @@ -187,6 +187,7 @@ int vboxsf_inode_revalidate(struct dentry *dentry) inode = d_inode(dentry); prev_mtime = inode->i_mtime; sf_i = VBOXSF_I(inode); + sbi = VBOXSF_SBI(dentry->d_sb); if (!sf_i->force_restat) { if (time_before(jiffies, dentry->d_time + sbi->o.ttl)) return 0; -- cgit v1.2.3 From e39fcaef7ed993950af74a584f8246022b551971 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Nov 2019 11:59:54 +0000 Subject: staging: Fix error return code in vboxsf_fill_super() Fix to return negative error code -ENOMEM from the error handling case instead of 0, as done elsewhere in this function. Fixes: df4028658f9d ("staging: Add VirtualBox guest shared folder (vboxsf) support") Signed-off-by: Wei Yongjun Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20191106115954.114678-1-weiyongjun1@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vboxsf/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/vboxsf/super.c b/drivers/staging/vboxsf/super.c index 3913ffafa83b..0bf4d724aefd 100644 --- a/drivers/staging/vboxsf/super.c +++ b/drivers/staging/vboxsf/super.c @@ -176,8 +176,10 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) /* Turn source into a shfl_string and map the folder */ size = strlen(fc->source) + 1; folder_name = kmalloc(SHFLSTRING_HEADER_SIZE + size, GFP_KERNEL); - if (!folder_name) + if (!folder_name) { + err = -ENOMEM; goto fail_free; + } folder_name->size = size; folder_name->length = size - 1; strlcpy(folder_name->string.utf8, fc->source, size); -- cgit v1.2.3 From ff29fde84d1fc82f233c7da0daa3574a3942bec7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Nov 2019 09:39:32 -0500 Subject: ceph: return -EINVAL if given fsc mount option on kernel w/o support If someone requests fscache on the mount, and the kernel doesn't support it, it should fail the mount. [ Drop ceph prefix -- it's provided by pr_err. ] Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index edfd643a8205..b47f43fc2d68 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -268,6 +268,7 @@ static int parse_fsopt_token(char *c, void *private) } break; case Opt_fscache_uniq: +#ifdef CONFIG_CEPH_FSCACHE kfree(fsopt->fscache_uniq); fsopt->fscache_uniq = kstrndup(argstr[0].from, argstr[0].to-argstr[0].from, @@ -276,7 +277,10 @@ static int parse_fsopt_token(char *c, void *private) return -ENOMEM; fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; break; - /* misc */ +#else + pr_err("fscache support is disabled\n"); + return -EINVAL; +#endif case Opt_wsize: if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) return -EINVAL; @@ -353,10 +357,15 @@ static int parse_fsopt_token(char *c, void *private) fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; break; case Opt_fscache: +#ifdef CONFIG_CEPH_FSCACHE fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; kfree(fsopt->fscache_uniq); fsopt->fscache_uniq = NULL; break; +#else + pr_err("fscache support is disabled\n"); + return -EINVAL; +#endif case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; kfree(fsopt->fscache_uniq); -- cgit v1.2.3 From 025ec40b81d785a98f76b8bdb509ac10773b4f12 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 7 Nov 2019 14:29:50 +0800 Subject: nfc: netlink: fix double device reference drop The function nfc_put_device(dev) is called twice to drop the reference to dev when there is no associated local llcp. Remove one of them to fix the bug. Fixes: 52feb444a903 ("NFC: Extend netlink interface for LTO, RW, and MIUX parameters support") Fixes: d9b8d8e19b07 ("NFC: llcp: Service Name Lookup netlink interface") Signed-off-by: Pan Bian Reviewed-by: Johan Hovold Signed-off-by: David S. Miller --- net/nfc/netlink.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 17e6ca62f1be..afde0d763039 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1099,7 +1099,6 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) local = nfc_llcp_find_local(dev); if (!local) { - nfc_put_device(dev); rc = -ENODEV; goto exit; } @@ -1159,7 +1158,6 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) local = nfc_llcp_find_local(dev); if (!local) { - nfc_put_device(dev); rc = -ENODEV; goto exit; } -- cgit v1.2.3 From 332f989a3b0041b810836c5c3747e59aad7e9d0b Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 7 Nov 2019 09:48:01 +0100 Subject: CDC-NCM: handle incomplete transfer of MTU A malicious device may give half an answer when asked for its MTU. The driver will proceed after this with a garbage MTU. Anything but a complete answer must be treated as an error. V2: used sizeof as request by Alexander Reported-and-tested-by: syzbot+0631d878823ce2411636@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 00cab3f43a4c..a245597a3902 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -578,8 +578,8 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) /* read current mtu value from device */ err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, - 0, iface_no, &max_datagram_size, 2); - if (err < 0) { + 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); + if (err < sizeof(max_datagram_size)) { dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n"); goto out; } @@ -590,7 +590,7 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) max_datagram_size = cpu_to_le16(ctx->max_datagram_size); err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, - 0, iface_no, &max_datagram_size, 2); + 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); if (err < 0) dev_dbg(&dev->intf->dev, "SET_MAX_DATAGRAM_SIZE failed\n"); -- cgit v1.2.3 From e497df686e8fed8c1dd69179010656362858edb3 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Thu, 7 Nov 2019 11:57:01 +0100 Subject: net: usb: qmi_wwan: add support for DW5821e with eSIM support Exactly same layout as the default DW5821e module, just a different vid/pid. The QMI interface is exposed in USB configuration #1: P: Vendor=413c ProdID=81e0 Rev=03.18 S: Manufacturer=Dell Inc. S: Product=DW5821e-eSIM Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option Signed-off-by: Aleksander Morgado Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 596428ec71df..56d334b9ad45 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1362,6 +1362,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ -- cgit v1.2.3 From bf5a6b4c474c589244dc25ee1af2c3c829228ef8 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Thu, 7 Nov 2019 17:09:53 +0000 Subject: net: hns: Fix the stray netpoll locks causing deadlock in NAPI path This patch fixes the problem of the spin locks, originally meant for the netpoll path of hns driver, causing deadlock in the normal NAPI poll path. The issue happened due to the presence of the stray leftover spin lock code related to the netpoll, whose support was earlier removed from the HNS[1], got activated due to enabling of NET_POLL_CONTROLLER switch. Earlier background: The netpoll handling code originally had this bug(as identified by Marc Zyngier[2]) of wrong spin lock API being used which did not disable the interrupts and hence could cause locking issues. i.e. if the lock were first acquired in context to thread like 'ip' util and this lock if ever got later acquired again in context to the interrupt context like TX/RX (Interrupts could always pre-empt the lock holding task and acquire the lock again) and hence could cause deadlock. Proposed Solution: 1. If the netpoll was enabled in the HNS driver, which is not right now, we could have simply used spin_[un]lock_irqsave() 2. But as netpoll is disabled, therefore, it is best to get rid of the existing locks and stray code for now. This should solve the problem reported by Marc. [1] https://git.kernel.org/torvalds/c/4bd2c03be7 [2] https://patchwork.ozlabs.org/patch/1189139/ Fixes: 4bd2c03be707 ("net: hns: remove ndo_poll_controller") Cc: lipeng Cc: Yisen Zhuang Cc: Eric Dumazet Cc: David S. Miller Reported-by: Marc Zyngier Acked-by: Marc Zyngier Tested-by: Marc Zyngier Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.c | 1 - drivers/net/ethernet/hisilicon/hns/hnae.h | 3 --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 22 +--------------------- 3 files changed, 1 insertion(+), 25 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index 6d0457eb4faa..08339278c722 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -199,7 +199,6 @@ hnae_init_ring(struct hnae_queue *q, struct hnae_ring *ring, int flags) ring->q = q; ring->flags = flags; - spin_lock_init(&ring->lock); ring->coal_param = q->handle->coal_param; assert(!ring->desc && !ring->desc_cb && !ring->desc_dma_addr); diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index e9c67c06bfd2..6ab9458302e1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -274,9 +274,6 @@ struct hnae_ring { /* statistic */ struct ring_stats stats; - /* ring lock for poll one */ - spinlock_t lock; - dma_addr_t desc_dma_addr; u32 buf_size; /* size for hnae_desc->addr, preset by AE */ u16 desc_num; /* total number of desc */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index a48396dd4ebb..14ab20491fd0 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -943,15 +943,6 @@ static int is_valid_clean_head(struct hnae_ring *ring, int h) return u > c ? (h > c && h <= u) : (h > c || h <= u); } -/* netif_tx_lock will turn down the performance, set only when necessary */ -#ifdef CONFIG_NET_POLL_CONTROLLER -#define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock) -#define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock) -#else -#define NETIF_TX_LOCK(ring) -#define NETIF_TX_UNLOCK(ring) -#endif - /* reclaim all desc in one budget * return error or number of desc left */ @@ -965,21 +956,16 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, int head; int bytes, pkts; - NETIF_TX_LOCK(ring); - head = readl_relaxed(ring->io_base + RCB_REG_HEAD); rmb(); /* make sure head is ready before touch any data */ - if (is_ring_empty(ring) || head == ring->next_to_clean) { - NETIF_TX_UNLOCK(ring); + if (is_ring_empty(ring) || head == ring->next_to_clean) return 0; /* no data to poll */ - } if (!is_valid_clean_head(ring, head)) { netdev_err(ndev, "wrong head (%d, %d-%d)\n", head, ring->next_to_use, ring->next_to_clean); ring->stats.io_err_cnt++; - NETIF_TX_UNLOCK(ring); return -EIO; } @@ -994,8 +980,6 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, ring->stats.tx_pkts += pkts; ring->stats.tx_bytes += bytes; - NETIF_TX_UNLOCK(ring); - dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index); netdev_tx_completed_queue(dev_queue, pkts, bytes); @@ -1055,16 +1039,12 @@ static void hns_nic_tx_clr_all_bufs(struct hns_nic_ring_data *ring_data) int head; int bytes, pkts; - NETIF_TX_LOCK(ring); - head = ring->next_to_use; /* ntu :soft setted ring position*/ bytes = 0; pkts = 0; while (head != ring->next_to_clean) hns_nic_reclaim_one_desc(ring, &bytes, &pkts); - NETIF_TX_UNLOCK(ring); - dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index); netdev_tx_reset_queue(dev_queue); } -- cgit v1.2.3 From 1bef4c223b8588cf50433bdc2c6953d82949b3b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 09:26:19 -0800 Subject: ipv6: fixes rt6_probe() and fib6_nh->last_probe init While looking at a syzbot KCSAN report [1], I found multiple issues in this code : 1) fib6_nh->last_probe has an initial value of 0. While probably okay on 64bit kernels, this causes an issue on 32bit kernels since the time_after(jiffies, 0 + interval) might be false ~24 days after boot (for HZ=1000) 2) The data-race found by KCSAN I could use READ_ONCE() and WRITE_ONCE(), but we also can take the opportunity of not piling-up too many rt6_probe_deferred() works by using instead cmpxchg() so that only one cpu wins the race. [1] BUG: KCSAN: data-race in find_match / find_match write to 0xffff8880bb7aabe8 of 8 bytes by interrupt on cpu 1: rt6_probe net/ipv6/route.c:663 [inline] find_match net/ipv6/route.c:757 [inline] find_match+0x5bd/0x790 net/ipv6/route.c:733 __find_rr_leaf+0xe3/0x780 net/ipv6/route.c:831 find_rr_leaf net/ipv6/route.c:852 [inline] rt6_select net/ipv6/route.c:896 [inline] fib6_table_lookup+0x383/0x650 net/ipv6/route.c:2164 ip6_pol_route+0xee/0x5c0 net/ipv6/route.c:2200 ip6_pol_route_output+0x48/0x60 net/ipv6/route.c:2452 fib6_rule_lookup+0x3d6/0x470 net/ipv6/fib6_rules.c:117 ip6_route_output_flags_noref+0x16b/0x230 net/ipv6/route.c:2484 ip6_route_output_flags+0x50/0x1a0 net/ipv6/route.c:2497 ip6_dst_lookup_tail+0x25d/0xc30 net/ipv6/ip6_output.c:1049 ip6_dst_lookup_flow+0x68/0x120 net/ipv6/ip6_output.c:1150 inet6_csk_route_socket+0x2f7/0x420 net/ipv6/inet6_connection_sock.c:106 inet6_csk_xmit+0x91/0x1f0 net/ipv6/inet6_connection_sock.c:121 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169 tcp_transmit_skb net/ipv4/tcp_output.c:1185 [inline] tcp_xmit_probe_skb+0x19b/0x1d0 net/ipv4/tcp_output.c:3735 read to 0xffff8880bb7aabe8 of 8 bytes by interrupt on cpu 0: rt6_probe net/ipv6/route.c:657 [inline] find_match net/ipv6/route.c:757 [inline] find_match+0x521/0x790 net/ipv6/route.c:733 __find_rr_leaf+0xe3/0x780 net/ipv6/route.c:831 find_rr_leaf net/ipv6/route.c:852 [inline] rt6_select net/ipv6/route.c:896 [inline] fib6_table_lookup+0x383/0x650 net/ipv6/route.c:2164 ip6_pol_route+0xee/0x5c0 net/ipv6/route.c:2200 ip6_pol_route_output+0x48/0x60 net/ipv6/route.c:2452 fib6_rule_lookup+0x3d6/0x470 net/ipv6/fib6_rules.c:117 ip6_route_output_flags_noref+0x16b/0x230 net/ipv6/route.c:2484 ip6_route_output_flags+0x50/0x1a0 net/ipv6/route.c:2497 ip6_dst_lookup_tail+0x25d/0xc30 net/ipv6/ip6_output.c:1049 ip6_dst_lookup_flow+0x68/0x120 net/ipv6/ip6_output.c:1150 inet6_csk_route_socket+0x2f7/0x420 net/ipv6/inet6_connection_sock.c:106 inet6_csk_xmit+0x91/0x1f0 net/ipv6/inet6_connection_sock.c:121 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 18894 Comm: udevd Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: cc3a86c802f0 ("ipv6: Change rt6_probe to take a fib6_nh") Fixes: f547fac624be ("ipv6: rate-limit probes for neighbourless routes") Signed-off-by: Eric Dumazet Reported-by: syzbot Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a63ff85fe141..e60bf8e7dd1a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -621,6 +621,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) { struct __rt6_probe_work *work = NULL; const struct in6_addr *nh_gw; + unsigned long last_probe; struct neighbour *neigh; struct net_device *dev; struct inet6_dev *idev; @@ -639,6 +640,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) nh_gw = &fib6_nh->fib_nh_gw6; dev = fib6_nh->fib_nh_dev; rcu_read_lock_bh(); + last_probe = READ_ONCE(fib6_nh->last_probe); idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { @@ -654,13 +656,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh) __neigh_set_probe_once(neigh); } write_unlock(&neigh->lock); - } else if (time_after(jiffies, fib6_nh->last_probe + + } else if (time_after(jiffies, last_probe + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); } - if (work) { - fib6_nh->last_probe = jiffies; + if (!work || cmpxchg(&fib6_nh->last_probe, + last_probe, jiffies) != last_probe) { + kfree(work); + } else { INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; dev_hold(dev); @@ -3383,6 +3387,9 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, int err; fib6_nh->fib_nh_family = AF_INET6; +#ifdef CONFIG_IPV6_ROUTER_PREF + fib6_nh->last_probe = jiffies; +#endif err = -ENODEV; if (cfg->fc_ifindex) { -- cgit v1.2.3 From e0a312629fefa943534fc46f7bfbe6de3fdaf463 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 7 Nov 2019 18:29:52 +0000 Subject: ipv4: Fix table id reference in fib_sync_down_addr Hendrik reported routes in the main table using source address are not removed when the address is removed. The problem is that fib_sync_down_addr does not account for devices in the default VRF which are associated with the main table. Fix by updating the table id reference. Fixes: 5a56a0b3a45d ("net: Don't delete routes in different VRFs") Reported-by: Hendrik Donner Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 0913a090b2bf..f1888c683426 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1814,8 +1814,8 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) int ret = 0; unsigned int hash = fib_laddr_hashfn(local); struct hlist_head *head = &fib_info_laddrhash[hash]; + int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; struct net *net = dev_net(dev); - int tb_id = l3mdev_fib_table(dev); struct fib_info *fi; if (!fib_info_laddrhash || local == 0) -- cgit v1.2.3 From 6dd47d9754ff0589715054b11294771f2c9a16ac Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Nov 2019 15:41:11 +0100 Subject: mac80211: fix ieee80211_txq_setup_flows() failure path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If ieee80211_txq_setup_flows() fails, we don't clean up LED state properly, leading to crashes later on, fix that. Fixes: dc8b274f0952 ("mac80211: Move up init of TXQs") Signed-off-by: Johannes Berg Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20191105154110.1ccf7112ba5d.I0ba865792446d051867b33153be65ce6b063d98c@changeid Signed-off-by: Johannes Berg --- net/mac80211/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index aba094b4ccfc..2d05c4cfaf6d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1292,8 +1292,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_remove_interfaces(local); fail_rate: rtnl_unlock(); - ieee80211_led_exit(local); fail_flows: + ieee80211_led_exit(local); destroy_workqueue(local->workqueue); fail_workqueue: wiphy_unregister(local->hw.wiphy); -- cgit v1.2.3 From 71e67c3bd127cfe7863f54e4b087eba1cc8f9a7a Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 5 Nov 2019 16:57:50 +0100 Subject: net/fq_impl: Switch to kvmalloc() for memory allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The FQ implementation used by mac80211 allocates memory using kmalloc(), which can fail; and Johannes reported that this actually happens in practice. To avoid this, switch the allocation to kvmalloc() instead; this also brings fq_impl in line with all the FQ qdiscs. Fixes: 557fc4a09803 ("fq: add fair queuing framework") Reported-by: Johannes Berg Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20191105155750.547379-1-toke@redhat.com Signed-off-by: Johannes Berg --- include/net/fq_impl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 107c0d700ed6..38a9a3d1222b 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -313,7 +313,7 @@ static int fq_init(struct fq *fq, int flows_cnt) fq->limit = 8192; fq->memory_limit = 16 << 20; /* 16 MBytes */ - fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); + fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); if (!fq->flows) return -ENOMEM; @@ -331,7 +331,7 @@ static void fq_reset(struct fq *fq, for (i = 0; i < fq->flows_cnt; i++) fq_flow_reset(fq, &fq->flows[i], free_func); - kfree(fq->flows); + kvfree(fq->flows); fq->flows = NULL; } -- cgit v1.2.3 From 285531f9e6774e3be71da6673d475ff1a088d675 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 31 Oct 2019 06:12:43 -0600 Subject: mac80211: fix station inactive_time shortly after boot In the first 5 minutes after boot (time of INITIAL_JIFFIES), ieee80211_sta_last_active() returns zero if last_ack is zero. This leads to "inactive time" showing jiffies_to_msecs(jiffies). # iw wlan0 station get fc:ec:da:64:a6:dd Station fc:ec:da:64:a6:dd (on wlan0) inactive time: 4294894049 ms . . connected time: 70 seconds Fix by returning last_rx if last_ack == 0. Signed-off-by: Ahmed Zaki Link: https://lore.kernel.org/r/20191031121243.27694-1-anzaki@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index bd11fef2139f..8d3a2389b055 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2457,7 +2457,8 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); - if (time_after(stats->last_rx, sta->status_stats.last_ack)) + if (!sta->status_stats.last_ack || + time_after(stats->last_rx, sta->status_stats.last_ack)) return stats->last_rx; return sta->status_stats.last_ack; } -- cgit v1.2.3 From c31432fa7f825de0e19838f1ac7746381c509ec4 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 31 Oct 2019 12:26:20 -0700 Subject: cpufreq: intel_pstate: Fix invalid EPB setting The max value of EPB can only be 0x0F. Attempting to set more than that triggers an "unchecked MSR access error" warning which happens in intel_pstate_hwp_force_min_perf() called via cpufreq stop_cpu(). However, it is not even necessary to touch the EPB from intel_pstate, because it is restored on every CPU online by the intel_epb.c code, so let that code do the right thing and drop the redundant (and incorrect) EPB update from intel_pstate. Fixes: af3b7379e2d70 ("cpufreq: intel_pstate: Force HWP min perf before offline") Reported-by: Qian Cai Cc: 5.2+ # 5.2+ Signed-off-by: Srinivas Pandruvada [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 53a51c169451..8ab31702cf6a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -847,11 +847,9 @@ static void intel_pstate_hwp_force_min_perf(int cpu) value |= HWP_MAX_PERF(min_perf); value |= HWP_MIN_PERF(min_perf); - /* Set EPP/EPB to min */ + /* Set EPP to min */ if (boot_cpu_has(X86_FEATURE_HWP_EPP)) value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE); - else - intel_pstate_set_epb(cpu, HWP_EPP_BALANCE_POWERSAVE); wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } -- cgit v1.2.3 From 8e9c523016cf9983b295e4bc659183d1fa6ef8e0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Nov 2019 10:48:47 +0300 Subject: block: drbd: remove a stray unlock in __drbd_send_protocol() There are two callers of this function and they both unlock the mutex so this ends up being a double unlock. Fixes: 44ed167da748 ("drbd: rcu_read_lock() and rcu_dereference() for tconn->net_conf") Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5b248763a672..a18155cdce41 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -786,7 +786,6 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm if (nc->tentative && connection->agreed_pro_version < 92) { rcu_read_unlock(); - mutex_unlock(&sock->mutex); drbd_err(connection, "--dry-run is not supported by peer"); return -EOPNOTSUPP; } -- cgit v1.2.3 From 24906a41eecb73d51974ade0847c21e429beec60 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 17 Oct 2019 21:22:18 +0200 Subject: pwm: bcm-iproc: Prevent unloading the driver module while in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The owner member of struct pwm_ops must be set to THIS_MODULE to increase the reference count of the module such that the module cannot be removed while its code is in use. Fixes: daa5abc41c80 ("pwm: Add support for Broadcom iProc PWM controller") Signed-off-by: Uwe Kleine-König Reviewed-by: Florian Fainelli Signed-off-by: Thierry Reding --- drivers/pwm/pwm-bcm-iproc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c index 56c38cfae92c..1f829edd8ee7 100644 --- a/drivers/pwm/pwm-bcm-iproc.c +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -187,6 +187,7 @@ static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops iproc_pwm_ops = { .apply = iproc_pwmc_apply, .get_state = iproc_pwmc_get_state, + .owner = THIS_MODULE, }; static int iproc_pwmc_probe(struct platform_device *pdev) -- cgit v1.2.3 From e4dd5608033efe7b6030cde359bfdbaeb73bc22d Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 8 Nov 2019 10:00:44 +0000 Subject: net: ethernet: octeon_mgmt: Account for second possible VLAN header Octeon's input ring-buffer entry has 14 bits-wide size field, so to account for second possible VLAN header max_mtu must be further reduced. Fixes: 109cc16526c6d ("ethernet/cavium: use core min/max MTU checking") Signed-off-by: Alexander Sverdlin Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 0e5de88fd6e8..cdd7e5da4a74 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1499,7 +1499,7 @@ static int octeon_mgmt_probe(struct platform_device *pdev) netdev->ethtool_ops = &octeon_mgmt_ethtool_ops; netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM; - netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM; + netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN; mac = of_get_mac_address(pdev->dev.of_node); -- cgit v1.2.3 From ad8a7220355d39cddce8eac1cea9677333e8b821 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 8 Nov 2019 17:08:50 +0100 Subject: vsock/virtio: fix sock refcnt holding during the shutdown The "42f5cda5eaf4" commit rightly set SOCK_DONE on peer shutdown, but there is an issue if we receive the SHUTDOWN(RDWR) while the virtio_transport_close_timeout() is scheduled. In this case, when the timeout fires, the SOCK_DONE is already set and the virtio_transport_close_timeout() will not call virtio_transport_reset() and virtio_transport_do_close(). This causes that both sockets remain open and will never be released, preventing the unloading of [virtio|vhost]_transport modules. This patch fixes this issue, calling virtio_transport_reset() and virtio_transport_do_close() when we receive the SHUTDOWN(RDWR) and there is nothing left to read. Fixes: 42f5cda5eaf4 ("vsock/virtio: set SOCK_DONE on peer shutdown") Cc: Stephen Barber Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 481f7f8a1655..fb2060dffb0a 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -947,9 +947,11 @@ virtio_transport_recv_connected(struct sock *sk, if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) vsk->peer_shutdown |= SEND_SHUTDOWN; if (vsk->peer_shutdown == SHUTDOWN_MASK && - vsock_stream_has_data(vsk) <= 0) { - sock_set_flag(sk, SOCK_DONE); - sk->sk_state = TCP_CLOSING; + vsock_stream_has_data(vsk) <= 0 && + !sock_flag(sk, SOCK_DONE)) { + (void)virtio_transport_reset(vsk, NULL); + + virtio_transport_do_close(vsk, true); } if (le32_to_cpu(pkt->hdr.flags)) sk->sk_state_change(sk); -- cgit v1.2.3 From 65de03e251382306a4575b1779c57c87889eee49 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 8 Nov 2019 12:18:29 -0800 Subject: cgroup,writeback: don't switch wbs immediately on dead wbs if the memcg is dead cgroup writeback tries to refresh the associated wb immediately if the current wb is dead. This is to avoid keeping issuing IOs on the stale wb after memcg - blkcg association has changed (ie. when blkcg got disabled / enabled higher up in the hierarchy). Unfortunately, the logic gets triggered spuriously on inodes which are associated with dead cgroups. When the logic is triggered on dead cgroups, the attempt fails only after doing quite a bit of work allocating and initializing a new wb. While c3aab9a0bd91 ("mm/filemap.c: don't initiate writeback if mapping has no dirty pages") alleviated the issue significantly as it now only triggers when the inode has dirty pages. However, the condition can still be triggered before the inode is switched to a different cgroup and the logic simply doesn't make sense. Skip the immediate switching if the associated memcg is dying. This is a simplified version of the following two patches: * https://lore.kernel.org/linux-mm/20190513183053.GA73423@dennisz-mbp/ * http://lkml.kernel.org/r/156355839560.2063.5265687291430814589.stgit@buzz Cc: Konstantin Khlebnikov Fixes: e8a7abf5a5bd ("writeback: disassociate inodes from dying bdi_writebacks") Acked-by: Dennis Zhou Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8461a6322039..335607b8c5c0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -576,10 +576,13 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, spin_unlock(&inode->i_lock); /* - * A dying wb indicates that the memcg-blkcg mapping has changed - * and a new wb is already serving the memcg. Switch immediately. + * A dying wb indicates that either the blkcg associated with the + * memcg changed or the associated memcg is dying. In the first + * case, a replacement wb should already be available and we should + * refresh the wb immediately. In the second case, trying to + * refresh will keep failing. */ - if (unlikely(wb_dying(wbc->wb))) + if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css))) inode_switch_wbs(inode, wbc->wb_id); } EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode); -- cgit v1.2.3 From e3b8b6a0d12cccf772113d6b5c1875192186fbd4 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 5 Nov 2019 11:22:12 +0000 Subject: sched/core: Fix compilation error when cgroup not selected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When cgroup is disabled the following compilation error was hit kernel/sched/core.c: In function ‘uclamp_update_active_tasks’: kernel/sched/core.c:1081:23: error: storage size of ‘it’ isn’t known struct css_task_iter it; ^~ kernel/sched/core.c:1084:2: error: implicit declaration of function ‘css_task_iter_start’; did you mean ‘__sg_page_iter_start’? [-Werror=implicit-function-declaration] css_task_iter_start(css, 0, &it); ^~~~~~~~~~~~~~~~~~~ __sg_page_iter_start kernel/sched/core.c:1085:14: error: implicit declaration of function ‘css_task_iter_next’; did you mean ‘__sg_page_iter_next’? [-Werror=implicit-function-declaration] while ((p = css_task_iter_next(&it))) { ^~~~~~~~~~~~~~~~~~ __sg_page_iter_next kernel/sched/core.c:1091:2: error: implicit declaration of function ‘css_task_iter_end’; did you mean ‘get_task_cred’? [-Werror=implicit-function-declaration] css_task_iter_end(&it); ^~~~~~~~~~~~~~~~~ get_task_cred kernel/sched/core.c:1081:23: warning: unused variable ‘it’ [-Wunused-variable] struct css_task_iter it; ^~ cc1: some warnings being treated as errors make[2]: *** [kernel/sched/core.o] Error 1 Fix by protetion uclamp_update_active_tasks() with CONFIG_UCLAMP_TASK_GROUP Fixes: babbe170e053 ("sched/uclamp: Update CPU's refcount on TG's clamp changes") Reported-by: Randy Dunlap Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Tested-by: Randy Dunlap Cc: Steven Rostedt Cc: Ingo Molnar Cc: Vincent Guittot Cc: Patrick Bellasi Cc: Mel Gorman Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Ben Segall Link: https://lkml.kernel.org/r/20191105112212.596-1-qais.yousef@arm.com --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dd05a378631a..afd4d8028771 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1073,6 +1073,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id) task_rq_unlock(rq, p, &rf); } +#ifdef CONFIG_UCLAMP_TASK_GROUP static inline void uclamp_update_active_tasks(struct cgroup_subsys_state *css, unsigned int clamps) @@ -1091,7 +1092,6 @@ uclamp_update_active_tasks(struct cgroup_subsys_state *css, css_task_iter_end(&it); } -#ifdef CONFIG_UCLAMP_TASK_GROUP static void cpu_util_update_eff(struct cgroup_subsys_state *css); static void uclamp_update_root_tg(void) { -- cgit v1.2.3 From 6e2df0581f569038719cf2bc2b3baa3fcc83cab4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Nov 2019 11:11:52 +0100 Subject: sched: Fix pick_next_task() vs 'change' pattern race Commit 67692435c411 ("sched: Rework pick_next_task() slow-path") inadvertly introduced a race because it changed a previously unexplored dependency between dropping the rq->lock and sched_class::put_prev_task(). The comments about dropping rq->lock, in for example newidle_balance(), only mentions the task being current and ->on_cpu being set. But when we look at the 'change' pattern (in for example sched_setnuma()): queued = task_on_rq_queued(p); /* p->on_rq == TASK_ON_RQ_QUEUED */ running = task_current(rq, p); /* rq->curr == p */ if (queued) dequeue_task(...); if (running) put_prev_task(...); /* change task properties */ if (queued) enqueue_task(...); if (running) set_next_task(...); It becomes obvious that if we do this after put_prev_task() has already been called on @p, things go sideways. This is exactly what the commit in question allows to happen when it does: prev->sched_class->put_prev_task(rq, prev, rf); if (!rq->nr_running) newidle_balance(rq, rf); The newidle_balance() call will drop rq->lock after we've called put_prev_task() and that allows the above 'change' pattern to interleave and mess up the state. Furthermore, it turns out we lost the RT-pull when we put the last DL task. Fix both problems by extracting the balancing from put_prev_task() and doing a multi-class balance() pass before put_prev_task(). Fixes: 67692435c411 ("sched: Rework pick_next_task() slow-path") Reported-by: Quentin Perret Signed-off-by: Peter Zijlstra (Intel) Tested-by: Quentin Perret Tested-by: Valentin Schneider --- kernel/sched/core.c | 21 +++++++++++++++------ kernel/sched/deadline.c | 40 ++++++++++++++++++++-------------------- kernel/sched/fair.c | 15 ++++++++++++--- kernel/sched/idle.c | 9 ++++++++- kernel/sched/rt.c | 37 +++++++++++++++++++------------------ kernel/sched/sched.h | 30 +++++++++++++++++++++++++++--- kernel/sched/stop_task.c | 18 +++++++++++------- 7 files changed, 112 insertions(+), 58 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index afd4d8028771..0f2eb3629070 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3929,13 +3929,22 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } restart: +#ifdef CONFIG_SMP /* - * Ensure that we put DL/RT tasks before the pick loop, such that they - * can PULL higher prio tasks when we lower the RQ 'priority'. + * We must do the balancing pass before put_next_task(), such + * that when we release the rq->lock the task is in the same + * state as before we took rq->lock. + * + * We can terminate the balance pass as soon as we know there is + * a runnable task of @class priority or higher. */ - prev->sched_class->put_prev_task(rq, prev, rf); - if (!rq->nr_running) - newidle_balance(rq, rf); + for_class_range(class, prev->sched_class, &idle_sched_class) { + if (class->balance(rq, prev, rf)) + break; + } +#endif + + put_prev_task(rq, prev); for_each_class(class) { p = class->pick_next_task(rq, NULL, NULL); @@ -6201,7 +6210,7 @@ static struct task_struct *__pick_migrate_task(struct rq *rq) for_each_class(class) { next = class->pick_next_task(rq, NULL, NULL); if (next) { - next->sched_class->put_prev_task(rq, next, NULL); + next->sched_class->put_prev_task(rq, next); return next; } } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 2dc48720f189..a8a08030a8f7 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1691,6 +1691,22 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) resched_curr(rq); } +static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf) +{ + if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) { + /* + * This is OK, because current is on_cpu, which avoids it being + * picked for load-balance and preemption/IRQs are still + * disabled avoiding further scheduler activity on it and we've + * not yet started the picking loop. + */ + rq_unpin_lock(rq, rf); + pull_dl_task(rq); + rq_repin_lock(rq, rf); + } + + return sched_stop_runnable(rq) || sched_dl_runnable(rq); +} #endif /* CONFIG_SMP */ /* @@ -1758,45 +1774,28 @@ static struct task_struct * pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { struct sched_dl_entity *dl_se; + struct dl_rq *dl_rq = &rq->dl; struct task_struct *p; - struct dl_rq *dl_rq; WARN_ON_ONCE(prev || rf); - dl_rq = &rq->dl; - - if (unlikely(!dl_rq->dl_nr_running)) + if (!sched_dl_runnable(rq)) return NULL; dl_se = pick_next_dl_entity(rq, dl_rq); BUG_ON(!dl_se); - p = dl_task_of(dl_se); - set_next_task_dl(rq, p); - return p; } -static void put_prev_task_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf) +static void put_prev_task_dl(struct rq *rq, struct task_struct *p) { update_curr_dl(rq); update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1); if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) enqueue_pushable_dl_task(rq, p); - - if (rf && !on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) { - /* - * This is OK, because current is on_cpu, which avoids it being - * picked for load-balance and preemption/IRQs are still - * disabled avoiding further scheduler activity on it and we've - * not yet started the picking loop. - */ - rq_unpin_lock(rq, rf); - pull_dl_task(rq); - rq_repin_lock(rq, rf); - } } /* @@ -2442,6 +2441,7 @@ const struct sched_class dl_sched_class = { .set_next_task = set_next_task_dl, #ifdef CONFIG_SMP + .balance = balance_dl, .select_task_rq = select_task_rq_dl, .migrate_task_rq = migrate_task_rq_dl, .set_cpus_allowed = set_cpus_allowed_dl, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 682a754ea3e1..22a2fed29054 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6570,6 +6570,15 @@ static void task_dead_fair(struct task_struct *p) { remove_entity_load_avg(&p->se); } + +static int +balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +{ + if (rq->nr_running) + return 1; + + return newidle_balance(rq, rf) != 0; +} #endif /* CONFIG_SMP */ static unsigned long wakeup_gran(struct sched_entity *se) @@ -6746,7 +6755,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf int new_tasks; again: - if (!cfs_rq->nr_running) + if (!sched_fair_runnable(rq)) goto idle; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -6884,7 +6893,7 @@ idle: /* * Account for a descheduled task: */ -static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) { struct sched_entity *se = &prev->se; struct cfs_rq *cfs_rq; @@ -10414,11 +10423,11 @@ const struct sched_class fair_sched_class = { .check_preempt_curr = check_preempt_wakeup, .pick_next_task = pick_next_task_fair, - .put_prev_task = put_prev_task_fair, .set_next_task = set_next_task_fair, #ifdef CONFIG_SMP + .balance = balance_fair, .select_task_rq = select_task_rq_fair, .migrate_task_rq = migrate_task_rq_fair, diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 8dad5aa600ea..f65ef1e2f204 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -365,6 +365,12 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags) { return task_cpu(p); /* IDLE tasks as never migrated */ } + +static int +balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +{ + return WARN_ON_ONCE(1); +} #endif /* @@ -375,7 +381,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl resched_curr(rq); } -static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { } @@ -460,6 +466,7 @@ const struct sched_class idle_sched_class = { .set_next_task = set_next_task_idle, #ifdef CONFIG_SMP + .balance = balance_idle, .select_task_rq = select_task_rq_idle, .set_cpus_allowed = set_cpus_allowed_common, #endif diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index ebaa4e619684..9b8adc01be3d 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1469,6 +1469,22 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) resched_curr(rq); } +static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf) +{ + if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) { + /* + * This is OK, because current is on_cpu, which avoids it being + * picked for load-balance and preemption/IRQs are still + * disabled avoiding further scheduler activity on it and we've + * not yet started the picking loop. + */ + rq_unpin_lock(rq, rf); + pull_rt_task(rq); + rq_repin_lock(rq, rf); + } + + return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq); +} #endif /* CONFIG_SMP */ /* @@ -1552,21 +1568,18 @@ static struct task_struct * pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { struct task_struct *p; - struct rt_rq *rt_rq = &rq->rt; WARN_ON_ONCE(prev || rf); - if (!rt_rq->rt_queued) + if (!sched_rt_runnable(rq)) return NULL; p = _pick_next_task_rt(rq); - set_next_task_rt(rq, p); - return p; } -static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf) +static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { update_curr_rt(rq); @@ -1578,18 +1591,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_fla */ if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); - - if (rf && !on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) { - /* - * This is OK, because current is on_cpu, which avoids it being - * picked for load-balance and preemption/IRQs are still - * disabled avoiding further scheduler activity on it and we've - * not yet started the picking loop. - */ - rq_unpin_lock(rq, rf); - pull_rt_task(rq); - rq_repin_lock(rq, rf); - } } #ifdef CONFIG_SMP @@ -2366,8 +2367,8 @@ const struct sched_class rt_sched_class = { .set_next_task = set_next_task_rt, #ifdef CONFIG_SMP + .balance = balance_rt, .select_task_rq = select_task_rq_rt, - .set_cpus_allowed = set_cpus_allowed_common, .rq_online = rq_online_rt, .rq_offline = rq_offline_rt, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0db2c1b3361e..c8870c5bd7df 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1727,10 +1727,11 @@ struct sched_class { struct task_struct * (*pick_next_task)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); - void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct rq_flags *rf); + void (*put_prev_task)(struct rq *rq, struct task_struct *p); void (*set_next_task)(struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP + int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); void (*migrate_task_rq)(struct task_struct *p, int new_cpu); @@ -1773,7 +1774,7 @@ struct sched_class { static inline void put_prev_task(struct rq *rq, struct task_struct *prev) { WARN_ON_ONCE(rq->curr != prev); - prev->sched_class->put_prev_task(rq, prev, NULL); + prev->sched_class->put_prev_task(rq, prev); } static inline void set_next_task(struct rq *rq, struct task_struct *next) @@ -1787,8 +1788,12 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) #else #define sched_class_highest (&dl_sched_class) #endif + +#define for_class_range(class, _from, _to) \ + for (class = (_from); class != (_to); class = class->next) + #define for_each_class(class) \ - for (class = sched_class_highest; class; class = class->next) + for_class_range(class, sched_class_highest, NULL) extern const struct sched_class stop_sched_class; extern const struct sched_class dl_sched_class; @@ -1796,6 +1801,25 @@ extern const struct sched_class rt_sched_class; extern const struct sched_class fair_sched_class; extern const struct sched_class idle_sched_class; +static inline bool sched_stop_runnable(struct rq *rq) +{ + return rq->stop && task_on_rq_queued(rq->stop); +} + +static inline bool sched_dl_runnable(struct rq *rq) +{ + return rq->dl.dl_nr_running > 0; +} + +static inline bool sched_rt_runnable(struct rq *rq) +{ + return rq->rt.rt_queued > 0; +} + +static inline bool sched_fair_runnable(struct rq *rq) +{ + return rq->cfs.nr_running > 0; +} #ifdef CONFIG_SMP diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 7e1cee4e65b2..c0640739e05e 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -15,6 +15,12 @@ select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags) { return task_cpu(p); /* stop tasks as never migrate */ } + +static int +balance_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +{ + return sched_stop_runnable(rq); +} #endif /* CONFIG_SMP */ static void @@ -31,16 +37,13 @@ static void set_next_task_stop(struct rq *rq, struct task_struct *stop) static struct task_struct * pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { - struct task_struct *stop = rq->stop; - WARN_ON_ONCE(prev || rf); - if (!stop || !task_on_rq_queued(stop)) + if (!sched_stop_runnable(rq)) return NULL; - set_next_task_stop(rq, stop); - - return stop; + set_next_task_stop(rq, rq->stop); + return rq->stop; } static void @@ -60,7 +63,7 @@ static void yield_task_stop(struct rq *rq) BUG(); /* the stop task should never yield, its pointless. */ } -static void put_prev_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) { struct task_struct *curr = rq->curr; u64 delta_exec; @@ -129,6 +132,7 @@ const struct sched_class stop_sched_class = { .set_next_task = set_next_task_stop, #ifdef CONFIG_SMP + .balance = balance_stop, .select_task_rq = select_task_rq_stop, .set_cpus_allowed = set_cpus_allowed_common, #endif -- cgit v1.2.3 From 1b53d64435d56902fc234ff2507142d971a09687 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 20:08:19 -0800 Subject: net: fix data-race in neigh_event_send() KCSAN reported the following data-race [1] The fix will also prevent the compiler from optimizing out the condition. [1] BUG: KCSAN: data-race in neigh_resolve_output / neigh_resolve_output write to 0xffff8880a41dba78 of 8 bytes by interrupt on cpu 1: neigh_event_send include/net/neighbour.h:443 [inline] neigh_resolve_output+0x78/0x480 net/core/neighbour.c:1474 neigh_output include/net/neighbour.h:511 [inline] ip_finish_output2+0x4af/0xe40 net/ipv4/ip_output.c:228 __ip_finish_output net/ipv4/ip_output.c:308 [inline] __ip_finish_output+0x23a/0x490 net/ipv4/ip_output.c:290 ip_finish_output+0x41/0x160 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip_output+0xdf/0x210 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:436 [inline] ip_local_out+0x74/0x90 net/ipv4/ip_output.c:125 __ip_queue_xmit+0x3a8/0xa40 net/ipv4/ip_output.c:532 ip_queue_xmit+0x45/0x60 include/net/ip.h:237 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169 tcp_transmit_skb net/ipv4/tcp_output.c:1185 [inline] __tcp_retransmit_skb+0x4bd/0x15f0 net/ipv4/tcp_output.c:2976 tcp_retransmit_skb+0x36/0x1a0 net/ipv4/tcp_output.c:2999 tcp_retransmit_timer+0x719/0x16d0 net/ipv4/tcp_timer.c:515 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:598 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:618 read to 0xffff8880a41dba78 of 8 bytes by interrupt on cpu 0: neigh_event_send include/net/neighbour.h:442 [inline] neigh_resolve_output+0x57/0x480 net/core/neighbour.c:1474 neigh_output include/net/neighbour.h:511 [inline] ip_finish_output2+0x4af/0xe40 net/ipv4/ip_output.c:228 __ip_finish_output net/ipv4/ip_output.c:308 [inline] __ip_finish_output+0x23a/0x490 net/ipv4/ip_output.c:290 ip_finish_output+0x41/0x160 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip_output+0xdf/0x210 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:436 [inline] ip_local_out+0x74/0x90 net/ipv4/ip_output.c:125 __ip_queue_xmit+0x3a8/0xa40 net/ipv4/ip_output.c:532 ip_queue_xmit+0x45/0x60 include/net/ip.h:237 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169 tcp_transmit_skb net/ipv4/tcp_output.c:1185 [inline] __tcp_retransmit_skb+0x4bd/0x15f0 net/ipv4/tcp_output.c:2976 tcp_retransmit_skb+0x36/0x1a0 net/ipv4/tcp_output.c:2999 tcp_retransmit_timer+0x719/0x16d0 net/ipv4/tcp_timer.c:515 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:598 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/neighbour.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 50a67bd6a434..b8452cc0e059 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -439,8 +439,8 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; - if (neigh->used != now) - neigh->used = now; + if (READ_ONCE(neigh->used) != now) + WRITE_ONCE(neigh->used, now); if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; -- cgit v1.2.3 From deabc87111c690097c03765ea017cd500f7376fc Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 8 Nov 2019 02:42:30 -0800 Subject: qede: fix NULL pointer deref in __qede_remove() While rebooting the system with SR-IOV vfs enabled leads to below crash due to recurrence of __qede_remove() on the VF devices (first from .shutdown() flow of the VF itself and another from PF's .shutdown() flow executing pci_disable_sriov()) This patch adds a safeguard in __qede_remove() flow to fix this, so that driver doesn't attempt to remove "already removed" devices. [ 194.360134] BUG: unable to handle kernel NULL pointer dereference at 00000000000008dc [ 194.360227] IP: [] __qede_remove+0x24/0x130 [qede] [ 194.360304] PGD 0 [ 194.360325] Oops: 0000 [#1] SMP [ 194.360360] Modules linked in: tcp_lp fuse tun bridge stp llc devlink bonding ip_set nfnetlink ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib ib_umad rpcrdma sunrpc rdma_ucm ib_uverbs ib_iser rdma_cm iw_cm ib_cm libiscsi scsi_transport_iscsi dell_smbios iTCO_wdt iTCO_vendor_support dell_wmi_descriptor dcdbas vfat fat pcc_cpufreq skx_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd qedr ib_core pcspkr ses enclosure joydev ipmi_ssif sg i2c_i801 lpc_ich mei_me mei wmi ipmi_si ipmi_devintf ipmi_msghandler tpm_crb acpi_pad acpi_power_meter xfs libcrc32c sd_mod crc_t10dif crct10dif_generic crct10dif_pclmul crct10dif_common crc32c_intel mgag200 [ 194.361044] qede i2c_algo_bit drm_kms_helper qed syscopyarea sysfillrect nvme sysimgblt fb_sys_fops ttm nvme_core mpt3sas crc8 ptp drm pps_core ahci raid_class scsi_transport_sas libahci libata drm_panel_orientation_quirks nfit libnvdimm dm_mirror dm_region_hash dm_log dm_mod [last unloaded: ip_tables] [ 194.361297] CPU: 51 PID: 7996 Comm: reboot Kdump: loaded Not tainted 3.10.0-1062.el7.x86_64 #1 [ 194.361359] Hardware name: Dell Inc. PowerEdge MX840c/0740HW, BIOS 2.4.6 10/15/2019 [ 194.361412] task: ffff9cea9b360000 ti: ffff9ceabebdc000 task.ti: ffff9ceabebdc000 [ 194.361463] RIP: 0010:[] [] __qede_remove+0x24/0x130 [qede] [ 194.361534] RSP: 0018:ffff9ceabebdfac0 EFLAGS: 00010282 [ 194.361570] RAX: 0000000000000000 RBX: ffff9cd013846098 RCX: 0000000000000000 [ 194.361621] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9cd013846098 [ 194.361668] RBP: ffff9ceabebdfae8 R08: 0000000000000000 R09: 0000000000000000 [ 194.361715] R10: 00000000bfe14201 R11: ffff9ceabfe141e0 R12: 0000000000000000 [ 194.361762] R13: ffff9cd013846098 R14: 0000000000000000 R15: ffff9ceab5e48000 [ 194.361810] FS: 00007f799c02d880(0000) GS:ffff9ceacb0c0000(0000) knlGS:0000000000000000 [ 194.361865] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 194.361903] CR2: 00000000000008dc CR3: 0000001bdac76000 CR4: 00000000007607e0 [ 194.361953] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 194.362002] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 194.362051] PKRU: 55555554 [ 194.362073] Call Trace: [ 194.362109] [] qede_remove+0x10/0x20 [qede] [ 194.362180] [] pci_device_remove+0x3e/0xc0 [ 194.362240] [] __device_release_driver+0x82/0xf0 [ 194.362285] [] device_release_driver+0x23/0x30 [ 194.362343] [] pci_stop_bus_device+0x84/0xa0 [ 194.362388] [] pci_stop_and_remove_bus_device+0x12/0x20 [ 194.362450] [] pci_iov_remove_virtfn+0xaf/0x160 [ 194.362496] [] sriov_disable+0x3c/0xf0 [ 194.362534] [] pci_disable_sriov+0x23/0x30 [ 194.362599] [] qed_sriov_disable+0x5e3/0x650 [qed] [ 194.362658] [] ? kfree+0x106/0x140 [ 194.362709] [] ? qed_free_stream_mem+0x70/0x90 [qed] [ 194.362754] [] ? kfree+0x106/0x140 [ 194.362803] [] qed_slowpath_stop+0x1a9/0x1d0 [qed] [ 194.362854] [] __qede_remove+0xae/0x130 [qede] [ 194.362904] [] qede_shutdown+0x10/0x20 [qede] [ 194.362956] [] pci_device_shutdown+0x3a/0x60 [ 194.363010] [] device_shutdown+0xfb/0x1f0 [ 194.363066] [] kernel_restart_prepare+0x36/0x40 [ 194.363107] [] kernel_restart+0x12/0x60 [ 194.363146] [] SYSC_reboot+0x229/0x260 [ 194.363196] [] ? handle_mm_fault+0x39d/0x9b0 [ 194.363253] [] ? __switch_to+0x151/0x580 [ 194.363304] [] ? __schedule+0x448/0x9c0 [ 194.363343] [] SyS_reboot+0xe/0x10 [ 194.363387] [] system_call_fastpath+0x25/0x2a [ 194.363430] Code: f9 e9 37 ff ff ff 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55 4c 8d af 98 00 00 00 41 54 4c 89 ef 41 89 f4 53 e8 4c e4 55 f9 <80> b8 dc 08 00 00 01 48 89 c3 4c 8d b8 c0 08 00 00 4c 8b b0 c0 [ 194.363712] RIP [] __qede_remove+0x24/0x130 [qede] [ 194.363764] RSP [ 194.363791] CR2: 00000000000008dc Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: Sudarsana Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 8d1c208f778f..a220cc7c947a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1208,8 +1208,16 @@ enum qede_remove_mode { static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) { struct net_device *ndev = pci_get_drvdata(pdev); - struct qede_dev *edev = netdev_priv(ndev); - struct qed_dev *cdev = edev->cdev; + struct qede_dev *edev; + struct qed_dev *cdev; + + if (!ndev) { + dev_info(&pdev->dev, "Device has already been removed\n"); + return; + } + + edev = netdev_priv(ndev); + cdev = edev->cdev; DP_INFO(edev, "Starting qede_remove\n"); -- cgit v1.2.3 From 615457a226f042bffc3a1532afb244cab37460d4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 1 Nov 2019 14:00:17 +0000 Subject: ice: fix potential infinite loop because loop counter being too small Currently the for-loop counter i is a u8 however it is being checked against a maximum value hw->num_tx_sched_layers which is a u16. Hence there is a potential wrap-around of counter i back to zero if hw->num_tx_sched_layers is greater than 255. Fix this by making i a u16. Addresses-Coverity: ("Infinite loop") Fixes: b36c598c999c ("ice: Updates to Tx scheduler code") Signed-off-by: Colin Ian King Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index fc624b73d05d..2fde9653a608 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1036,7 +1036,7 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw) struct ice_aqc_query_txsched_res_resp *buf; enum ice_status status = 0; __le16 max_sibl; - u8 i; + u16 i; if (hw->layer_info) return status; -- cgit v1.2.3 From 4eda4e0096842764d725bcfd77471a419832b074 Mon Sep 17 00:00:00 2001 From: Nicholas Nunley Date: Tue, 5 Nov 2019 04:22:14 -0800 Subject: iavf: initialize ITRN registers with correct values Since commit 92418fb14750 ("i40e/i40evf: Use usec value instead of reg value for ITR defines") the driver tracks the interrupt throttling intervals in single usec units, although the actual ITRN registers are programmed in 2 usec units. Most register programming flows in the driver correctly handle the conversion, although it is currently not applied when the registers are initialized to their default values. Most of the time this doesn't present a problem since the default values are usually immediately overwritten through the standard adaptive throttling mechanism, or updated manually by the user, but if adaptive throttling is disabled and the interval values are left alone then the incorrect value will persist. Since the intended default interval of 50 usecs (vs. 100 usecs as programmed) performs better for most traffic workloads, this can lead to performance regressions. This patch adds the correct conversion when writing the initial values to the ITRN registers. Signed-off-by: Nicholas Nunley Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 8f310e520b06..821987da5698 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -314,7 +314,7 @@ iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector->ring_mask |= BIT(r_idx); wr32(hw, IAVF_VFINT_ITRN1(IAVF_RX_ITR, q_vector->reg_idx), - q_vector->rx.current_itr); + q_vector->rx.current_itr >> 1); q_vector->rx.current_itr = q_vector->rx.target_itr; } @@ -340,7 +340,7 @@ iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); q_vector->num_ringpairs++; wr32(hw, IAVF_VFINT_ITRN1(IAVF_TX_ITR, q_vector->reg_idx), - q_vector->tx.target_itr); + q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; } -- cgit v1.2.3 From 4c9da6f2b8a029052c75bd4a61ae229135831177 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Wed, 6 Nov 2019 06:24:04 -0800 Subject: i40e: Fix for ethtool -m issue on X722 NIC This patch contains fix for a problem with command: 'ethtool -m ' which breaks functionality of: 'ethtool ' when called on X722 NIC Disallowed update of link phy_types on X722 NIC Currently correct value cannot be obtained from FW Previously wrong value returned by FW was used and was a root cause for incorrect output of 'ethtool ' command Signed-off-by: Arkadiusz Kubalewski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index d37c6e0e5f08..7560f06768e0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1876,7 +1876,8 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; - if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE && + hw->mac.type != I40E_MAC_X722) { __le32 tmp; memcpy(&tmp, resp->link_type, sizeof(tmp)); -- cgit v1.2.3 From 6acab13bdf2adda250b2a4d648fd4b49f1d0f14c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 6 Nov 2019 09:18:23 -0800 Subject: igb/igc: use ktime accessors for skb->tstamp When implementing launch time support in the igb and igc drivers, the skb->tstamp value is assumed to be a s64, but it's declared as a ktime_t value. Although ktime_t is typedef'd to s64 it wasn't always, and the kernel provides accessors for ktime_t values. Use the ktime_to_timespec64 and ktime_set accessors instead of directly assuming that the variable is always an s64. This improves portability if the code is ever moved to another kernel version, or if the definition of ktime_t ever changes again in the future. Signed-off-by: Jacob Keller Acked-by: Vinicius Costa Gomes Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 2 ++ drivers/net/ethernet/intel/igb/igb_main.c | 4 ++-- drivers/net/ethernet/intel/igc/igc_main.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 530613f31527..69a2daaca5c5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -20,6 +20,8 @@ /* API version 1.7 implements additional link and PHY-specific APIs */ #define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007 +/* API version 1.9 for X722 implements additional link and PHY-specific APIs */ +#define I40E_MINOR_VER_GET_LINK_INFO_X722 0x0009 /* API version 1.6 for X722 devices adds ability to stop FW LLDP agent */ #define I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722 0x0006 diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 9148c62d9ac5..ed7e667d7eb2 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5675,8 +5675,8 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, * should have been handled by the upper layers. */ if (tx_ring->launchtime_enable) { - ts = ns_to_timespec64(first->skb->tstamp); - first->skb->tstamp = 0; + ts = ktime_to_timespec64(first->skb->tstamp); + first->skb->tstamp = ktime_set(0, 0); context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32); } else { context_desc->seqnum_seed = 0; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 8e424dfab12e..24888676f69b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -824,8 +824,8 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, * should have been handled by the upper layers. */ if (tx_ring->launchtime_enable) { - ts = ns_to_timespec64(first->skb->tstamp); - first->skb->tstamp = 0; + ts = ktime_to_timespec64(first->skb->tstamp); + first->skb->tstamp = ktime_set(0, 0); context_desc->launch_time = cpu_to_le32(ts.tv_nsec / 32); } else { context_desc->launch_time = 0; -- cgit v1.2.3 From 705639572e8c096b12fcbff64a5c67db5b2d6ac0 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 8 Nov 2019 20:58:09 +0100 Subject: i40e: need_wakeup flag might not be set for Tx The need_wakeup flag for Tx might not be set for AF_XDP sockets that are only used to send packets. This happens if there is at least one outstanding packet that has not been completed by the hardware and we get that corresponding completion (which will not generate an interrupt since interrupts are disabled in the napi poll loop) between the time we stopped processing the Tx completions and interrupts are enabled again. In this case, the need_wakeup flag will have been cleared at the end of the Tx completion processing as we believe we will get an interrupt from the outstanding completion at a later point in time. But if this completion interrupt occurs before interrupts are enable, we lose it and should at that point really have set the need_wakeup flag since there are no more outstanding completions that can generate an interrupt to continue the processing. When this happens, user space will see a Tx queue need_wakeup of 0 and skip issuing a syscall, which means will never get into the Tx processing again and we have a deadlock. This patch introduces a quick fix for this issue by just setting the need_wakeup flag for Tx to 1 all the time. I am working on a proper fix for this that will toggle the flag appropriately, but it is more challenging than I anticipated and I am afraid that this patch will not be completed before the merge window closes, therefore this easier fix for now. This fix has a negative performance impact in the range of 0% to 4%. Towards the higher end of the scale if you have driver and application on the same core and issue a lot of packets, and towards no negative impact if you use two cores, lower transmission speeds and/or a workload that also receives packets. Signed-off-by: Magnus Karlsson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index a05dfecdd9b4..d07e1a890428 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -689,8 +689,6 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) i40e_xdp_ring_update_tail(xdp_ring); xsk_umem_consume_tx_done(xdp_ring->xsk_umem); - if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem)) - xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem); } return !!budget && work_done; @@ -769,12 +767,8 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, i40e_update_tx_stats(tx_ring, completed_frames, total_bytes); out_xmit: - if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) { - if (tx_ring->next_to_clean == tx_ring->next_to_use) - xsk_set_tx_need_wakeup(tx_ring->xsk_umem); - else - xsk_clear_tx_need_wakeup(tx_ring->xsk_umem); - } + if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) + xsk_set_tx_need_wakeup(tx_ring->xsk_umem); xmit_done = i40e_xmit_zc(tx_ring, budget); -- cgit v1.2.3 From 0843aa8f12edbd60e64e71f854eab2f452010eaa Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 8 Nov 2019 20:58:10 +0100 Subject: ixgbe: need_wakeup flag might not be set for Tx The need_wakeup flag for Tx might not be set for AF_XDP sockets that are only used to send packets. This happens if there is at least one outstanding packet that has not been completed by the hardware and we get that corresponding completion (which will not generate an interrupt since interrupts are disabled in the napi poll loop) between the time we stopped processing the Tx completions and interrupts are enabled again. In this case, the need_wakeup flag will have been cleared at the end of the Tx completion processing as we believe we will get an interrupt from the outstanding completion at a later point in time. But if this completion interrupt occurs before interrupts are enable, we lose it and should at that point really have set the need_wakeup flag since there are no more outstanding completions that can generate an interrupt to continue the processing. When this happens, user space will see a Tx queue need_wakeup of 0 and skip issuing a syscall, which means will never get into the Tx processing again and we have a deadlock. This patch introduces a quick fix for this issue by just setting the need_wakeup flag for Tx to 1 all the time. I am working on a proper fix for this that will toggle the flag appropriately, but it is more challenging than I anticipated and I am afraid that this patch will not be completed before the merge window closes, therefore this easier fix for now. This fix has a negative performance impact in the range of 0% to 4%. Towards the higher end of the scale if you have driver and application on the same core and issue a lot of packets, and towards no negative impact if you use two cores, lower transmission speeds and/or a workload that also receives packets. Signed-off-by: Magnus Karlsson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 100ac89b345d..d6feaacfbf89 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -622,8 +622,6 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) if (tx_desc) { ixgbe_xdp_ring_update_tail(xdp_ring); xsk_umem_consume_tx_done(xdp_ring->xsk_umem); - if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem)) - xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem); } return !!budget && work_done; @@ -691,12 +689,8 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, if (xsk_frames) xsk_umem_complete_tx(umem, xsk_frames); - if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) { - if (tx_ring->next_to_clean == tx_ring->next_to_use) - xsk_set_tx_need_wakeup(tx_ring->xsk_umem); - else - xsk_clear_tx_need_wakeup(tx_ring->xsk_umem); - } + if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) + xsk_set_tx_need_wakeup(tx_ring->xsk_umem); return ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); } -- cgit v1.2.3 From 820b7c717f09ea5f024f2185e69e2847fd2851dd Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Sun, 10 Nov 2019 16:27:54 +0000 Subject: lib: Remove select of inexistant GENERIC_IO config option GENERIC_IO was removed but still selected by lib/kconfig This patch finish the cleaning. Fixes: 9de8da47742b ("kconfig: kill off GENERIC_IO option") Acked-by: Rob Herring Signed-off-by: Corentin Labbe Signed-off-by: Linus Torvalds --- lib/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Kconfig b/lib/Kconfig index 183f92a297ca..3321d04dfa5a 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -447,7 +447,6 @@ config ASSOCIATIVE_ARRAY config HAS_IOMEM bool depends on !NO_IOMEM - select GENERIC_IO default y config HAS_IOPORT_MAP -- cgit v1.2.3 From 31f4f5b495a62c9a8b15b1c3581acd5efeb9af8c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Nov 2019 16:17:15 -0800 Subject: Linux 5.4-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b37d0e8fc61d..1d5298356ea8 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- cgit v1.2.3