From 747e5a5ff2a2ae84715c33d6679ac3c5220a3aec Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 24 Nov 2016 14:40:50 +0000 Subject: drm: hdlcd: Fix cleanup order If hdlcd_drm_bind() fails at drm_fbdev_cma_init(), its cleanup will call drm_mode_config_cleanup() as if to balance drm_mode_config_reset(). The net result is that drm_connector_cleanup() will clean up the active connectors long before component_unbind_all() gets called, so when the connector later tries to clean up itself after being unbound, Bad Things can happen: [ 4.121888] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 4.129951] pgd = ffffff80091e0000 [ 4.133345] [00000000] *pgd=00000009ffffe003, *pud=00000009ffffe003, *pmd=0000000000000000 [ 4.141613] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 4.147144] Modules linked in: [ 4.150188] CPU: 0 PID: 122 Comm: kworker/u12:2 Not tainted 4.8.0-rc2+ #989 [ 4.157097] Hardware name: ARM Juno development board (r1) (DT) [ 4.162981] Workqueue: deferwq deferred_probe_work_func [ 4.168173] task: ffffffc975d93200 task.stack: ffffffc975dac000 [ 4.174055] PC is at drm_connector_cleanup+0x58/0x1c0 [ 4.179074] LR is at tda998x_unbind+0x24/0x40 [ 4.183401] pc : [] lr : [] pstate: 00000045 [ 4.190750] sp : ffffffc975dafa10 [ 4.194041] x29: ffffffc975dafa10 x28: ffffffc9768152a8 [ 4.199325] x27: ffffffc97ff46450 x26: ffffff8008d99000 [ 4.204608] x25: dead000000000100 x24: dead000000000200 [ 4.209891] x23: ffffffc976bf91e8 x22: 0000000000000000 [ 4.215172] x21: ffffffc976bf9170 x20: ffffffc976bf9170 [ 4.220454] x19: ffffffc976bf9018 x18: 0000000000000000 [ 4.225737] x17: 0000000074ce71ee x16: 000000008ff5d35f [ 4.231019] x15: ffffffc97681e91c x14: ffffffffffffffff [ 4.236301] x13: ffffffc97681e185 x12: 0000000000000038 [ 4.241583] x11: 0101010101010101 x10: 0000000000000000 [ 4.246866] x9 : 0000000040000000 x8 : 0000000000210d00 [ 4.252148] x7 : ffffffc97fea8c00 x6 : 000000000000001b [ 4.257430] x5 : ffffff80084b7b8c x4 : 0000000000000080 [ 4.262712] x3 : ffffff8008504128 x2 : ffffffc975df3800 [ 4.267993] x1 : 0000000000000000 x0 : 0000000000000000 ... [ 4.750937] [] drm_connector_cleanup+0x58/0x1c0 [ 4.756990] [] tda998x_unbind+0x24/0x40 [ 4.762354] [] component_unbind.isra.4+0x28/0x50 [ 4.768492] [] component_unbind_all+0xcc/0xd8 [ 4.774373] [] hdlcd_drm_bind+0x234/0x418 [ 4.779909] [] try_to_bring_up_master+0x140/0x1a0 [ 4.786133] [] component_add+0x98/0x170 [ 4.791496] [] tda998x_probe+0x18/0x20 [ 4.796774] [] i2c_device_probe+0x164/0x258 [ 4.802481] [] driver_probe_device+0x204/0x2b0 [ 4.808447] [] __device_attach_driver+0x9c/0xf8 [ 4.814498] [] bus_for_each_drv+0x58/0x98 [ 4.820033] [] __device_attach+0xc4/0x138 [ 4.825567] [] device_initial_probe+0x10/0x18 [ 4.831446] [] bus_probe_device+0x94/0xa0 [ 4.836981] [] deferred_probe_work_func+0x78/0xb0 [ 4.843207] [] process_one_work+0x118/0x378 [ 4.848914] [] worker_thread+0x48/0x498 [ 4.854276] [] kthread+0xd0/0xe8 [ 4.859036] [] ret_from_fork+0x10/0x40 [ 4.864314] Code: f2fbd5b9 f2fbd5b8 f8478ee0 eb17001f (f9400013) [ 4.870472] ---[ end trace a643cfe4ce1d838b ]--- Fix this by moving the drm_mode_config_cleanup() much later such that it correctly balances drm_mode_config_init(). Suggested-by: Russell King Signed-off-by: Robin Murphy Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index fb6a418ce6be..e138fb51e8ce 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -375,7 +375,6 @@ static int hdlcd_drm_bind(struct device *dev) err_fbdev: drm_kms_helper_poll_fini(drm); - drm_mode_config_cleanup(drm); drm_vblank_cleanup(drm); err_vblank: pm_runtime_disable(drm->dev); @@ -387,6 +386,7 @@ err_unload: drm_irq_uninstall(drm); of_reserved_mem_device_release(drm->dev); err_free: + drm_mode_config_cleanup(drm); dev_set_drvdata(dev, NULL); drm_dev_unref(drm); -- cgit v1.2.3 From 10c77dba40ff58fc03587b3b60725bb7fd723183 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 16 Nov 2016 18:27:56 +0000 Subject: powerpc/boot: Fix build failure in 32-bit boot wrapper OPAL is not callable from 32-bit mode and the assembly code for it may not even build (depending on how binutils was configured). References: https://buildd.debian.org/status/fetch.php?pkg=linux&arch=powerpcspe&ver=4.8.7-1&stamp=1479203712 Fixes: 656ad58ef19e ("powerpc/boot: Add OPAL console to epapr wrappers") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Ben Hutchings Signed-off-by: Michael Ellerman --- arch/powerpc/boot/Makefile | 3 ++- arch/powerpc/boot/opal.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index eae2dc8bc218..9d47f2efa830 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -100,7 +100,8 @@ src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \ ns16550.c serial.c simple_alloc.c div64.S util.S \ elf_util.c $(zlib-y) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \ - uartlite.c mpc52xx-psc.c opal.c opal-calls.S + uartlite.c mpc52xx-psc.c opal.c +src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c index d7b4fd47eb44..0272570d02de 100644 --- a/arch/powerpc/boot/opal.c +++ b/arch/powerpc/boot/opal.c @@ -13,7 +13,7 @@ #include #include "../include/asm/opal-api.h" -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64_BOOT_WRAPPER /* Global OPAL struct used by opal-call.S */ struct opal { -- cgit v1.2.3 From b64268d8a3f623c9b88676ad3dfacc95cfcfc62f Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 24 Nov 2016 13:33:47 +0800 Subject: drm/amd/powerplay: initialize the soft_regs offset in struct smu7_hwmgr This could lead to mclk dpm problems on some boards. Signed-off-by: Rex Zhu Ack-by: Tom St Denis Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c index 4ccc0b72324d..71bb2f8dc157 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c @@ -2214,6 +2214,7 @@ uint32_t polaris10_get_mac_definition(uint32_t value) int polaris10_process_firmware_header(struct pp_hwmgr *hwmgr) { struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smumgr->backend); + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); uint32_t tmp; int result; bool error = false; @@ -2233,8 +2234,10 @@ int polaris10_process_firmware_header(struct pp_hwmgr *hwmgr) offsetof(SMU74_Firmware_Header, SoftRegisters), &tmp, SMC_RAM_END); - if (!result) + if (!result) { + data->soft_regs_start = tmp; smu_data->smu7_data.soft_regs_start = tmp; + } error |= (0 != result); -- cgit v1.2.3 From dd7b2f035ec41a409f7a7cec7aabc0ec0eacf476 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 29 Nov 2016 13:13:46 +1100 Subject: powerpc/mm: Fix lazy icache flush on pre-POWER5 On 64-bit CPUs with no-execute support and non-snooping icache, such as 970 or POWER4, we have a software mechanism to ensure coherency of the cache (using exec faults when needed). This was broken due to a logic error when the code was rewritten from assembly to C, previously the assembly code did: BEGIN_FTR_SECTION mr r4,r30 mr r5,r7 bl hash_page_do_lazy_icache END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) Which tests that: (cpu_features & (NOEXECUTE | COHERENT_ICACHE)) == NOEXECUTE Which says that the current cpu does have NOEXECUTE, but does not have COHERENT_ICACHE. Fixes: 91f1da99792a ("powerpc/mm: Convert 4k hash insert to C") Fixes: 89ff725051d1 ("powerpc/mm: Convert __hash_page_64K to C") Fixes: a43c0eb8364c ("powerpc/mm: Convert 4k insert from asm to C") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Aneesh Kumar K.V [mpe: Change log verbosification] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash64_4k.c | 2 +- arch/powerpc/mm/hash64_64k.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 42c702b3be1f..6fa450c12d6d 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, */ rflags = htab_convert_pte_flags(new_pte); - if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 3bbbea07378c..1a68cb19b0e3 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -87,7 +87,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, subpg_pte = new_pte & ~subpg_prot; rflags = htab_convert_pte_flags(subpg_pte); - if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { /* @@ -258,7 +258,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, rflags = htab_convert_pte_flags(new_pte); - if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); -- cgit v1.2.3 From 7ac33e47d5769632010e537964c7e45498f8dc26 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Sat, 26 Nov 2016 15:05:01 +0100 Subject: drm/amdgpu: fix check for port PM availability The ATPX method does not always exist on the dGPU, it may be located at the iGPU. The parent device of the iGPU is the root port for which bridge_d3 is false. This accidentally enables the legacy PM method which conflicts with port PM and prevented the dGPU from powering on. Fixes: 1db4496f167b ("drm/amdgpu: fix power state when port pm is unavailable") Reported-and-tested-by: Mike Lothian Signed-off-by: Peter Wu Signed-off-by: Alex Deucher Cc: # 4.8+ --- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 02ca5dd978f6..6c343a933182 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -485,7 +485,6 @@ static int amdgpu_atpx_power_state(enum vga_switcheroo_client_id id, */ static bool amdgpu_atpx_pci_probe_handle(struct pci_dev *pdev) { - struct pci_dev *parent_pdev = pci_upstream_bridge(pdev); acpi_handle dhandle, atpx_handle; acpi_status status; @@ -500,7 +499,6 @@ static bool amdgpu_atpx_pci_probe_handle(struct pci_dev *pdev) } amdgpu_atpx_priv.dhandle = dhandle; amdgpu_atpx_priv.atpx.handle = atpx_handle; - amdgpu_atpx_priv.bridge_pm_usable = parent_pdev && parent_pdev->bridge_d3; return true; } @@ -562,17 +560,25 @@ static bool amdgpu_atpx_detect(void) struct pci_dev *pdev = NULL; bool has_atpx = false; int vga_count = 0; + bool d3_supported = false; + struct pci_dev *parent_pdev; while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { vga_count++; has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true); + + parent_pdev = pci_upstream_bridge(pdev); + d3_supported |= parent_pdev && parent_pdev->bridge_d3; } while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { vga_count++; has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true); + + parent_pdev = pci_upstream_bridge(pdev); + d3_supported |= parent_pdev && parent_pdev->bridge_d3; } if (has_atpx && vga_count == 2) { @@ -580,6 +586,7 @@ static bool amdgpu_atpx_detect(void) printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n", acpi_method_name); amdgpu_atpx_priv.atpx_detected = true; + amdgpu_atpx_priv.bridge_pm_usable = d3_supported; amdgpu_atpx_init(); return true; } -- cgit v1.2.3 From bcfdd5d5105087e6f33dfeb08a1ca6b2c0287b61 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 28 Nov 2016 17:23:40 -0500 Subject: drm/radeon: fix check for port PM availability The ATPX method does not always exist on the dGPU, it may be located at the iGPU. The parent device of the iGPU is the root port for which bridge_d3 is false. This accidentally enables the legacy PM method which conflicts with port PM and prevented the dGPU from powering on. Ported from amdgpu commit: drm/amdgpu: fix check for port PM availability from Peter Wu. Fixes: d3ac31f3b4bf9fad (drm/radeon: fix power state when port pm is unavailable (v2)) Signed-off-by: Alex Deucher Cc: Peter Wu Cc: # 4.8+ --- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 4129b12521a6..0ae13cd2adda 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -479,7 +479,6 @@ static int radeon_atpx_power_state(enum vga_switcheroo_client_id id, */ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev) { - struct pci_dev *parent_pdev = pci_upstream_bridge(pdev); acpi_handle dhandle, atpx_handle; acpi_status status; @@ -493,7 +492,6 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev) radeon_atpx_priv.dhandle = dhandle; radeon_atpx_priv.atpx.handle = atpx_handle; - radeon_atpx_priv.bridge_pm_usable = parent_pdev && parent_pdev->bridge_d3; return true; } @@ -555,11 +553,16 @@ static bool radeon_atpx_detect(void) struct pci_dev *pdev = NULL; bool has_atpx = false; int vga_count = 0; + bool d3_supported = false; + struct pci_dev *parent_pdev; while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { vga_count++; has_atpx |= (radeon_atpx_pci_probe_handle(pdev) == true); + + parent_pdev = pci_upstream_bridge(pdev); + d3_supported |= parent_pdev && parent_pdev->bridge_d3; } /* some newer PX laptops mark the dGPU as a non-VGA display device */ @@ -567,6 +570,9 @@ static bool radeon_atpx_detect(void) vga_count++; has_atpx |= (radeon_atpx_pci_probe_handle(pdev) == true); + + parent_pdev = pci_upstream_bridge(pdev); + d3_supported |= parent_pdev && parent_pdev->bridge_d3; } if (has_atpx && vga_count == 2) { @@ -574,6 +580,7 @@ static bool radeon_atpx_detect(void) printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n", acpi_method_name); radeon_atpx_priv.atpx_detected = true; + radeon_atpx_priv.bridge_pm_usable = d3_supported; radeon_atpx_init(); return true; } -- cgit v1.2.3 From e94bd1736f1f60e916a85a80c0b0ebeaae36cce5 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 30 Nov 2016 17:30:01 +0900 Subject: drm: Don't call drm_for_each_crtc with a non-KMS driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes oops if userspace calls DRM_IOCTL_GET_CAP for DRM_CAP_PAGE_FLIP_TARGET on a non-KMS device node. (Normal userspace doesn't do that, discovered by syzkaller) Reported-by: Dmitry Vyukov Fixes: f837297ad824 ("drm: Add DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags v2") Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161130083002.1520-1-michel@daenzer.net --- drivers/gpu/drm/drm_ioctl.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 0ad2c47f808f..71c3473476c7 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -254,10 +254,12 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_ req->value = dev->mode_config.async_page_flip; break; case DRM_CAP_PAGE_FLIP_TARGET: - req->value = 1; - drm_for_each_crtc(crtc, dev) { - if (!crtc->funcs->page_flip_target) - req->value = 0; + if (drm_core_check_feature(dev, DRIVER_MODESET)) { + req->value = 1; + drm_for_each_crtc(crtc, dev) { + if (!crtc->funcs->page_flip_target) + req->value = 0; + } } break; case DRM_CAP_CURSOR_WIDTH: -- cgit v1.2.3 From 2420489bcb8910188578acc0c11c75445c2e4b92 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Nov 2016 11:29:30 +0000 Subject: drm/i915: Don't touch NULL sg on i915_gem_object_get_pages_gtt() error On the DMA mapping error path, sg may be NULL (it has already been marked as the last scatterlist entry), and we should avoid dereferencing it again. Reported-by: Dan Carpenter Fixes: e227330223a7 ("drm/i915: avoid leaking DMA mappings") Signed-off-by: Chris Wilson Cc: Imre Deak Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20161114112930.2033-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld (cherry picked from commit b17993b7b29612369270567643bcff814f4b3d7f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 91ab7e9d6d2e..00eb4814b913 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2268,7 +2268,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) { ret = PTR_ERR(page); - goto err_pages; + goto err_sg; } } #ifdef CONFIG_SWIOTLB @@ -2311,8 +2311,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) return 0; -err_pages: +err_sg: sg_mark_end(sg); +err_pages: for_each_sgt_page(page, sgt_iter, st) put_page(page); sg_free_table(st); -- cgit v1.2.3 From e411072d5740a49cdc9d0713798c30440757e451 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 28 Nov 2016 10:36:48 +0000 Subject: drm/i915: drop the struct_mutex when wedged or trying to reset We grab the struct_mutex in intel_crtc_page_flip, but if we are wedged or a reset is in progress we bail early but never seem to actually release the lock. Fixes: 7f1847ebf48b ("drm/i915: Simplify checking of GPU reset_counter in display pageflips") Cc: Chris Wilson Signed-off-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20161128103648.9235-1-matthew.auld@intel.com Reviewed-by: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Cc: # v4.7+ (cherry picked from commit ddbb271aea87fc6004d3c8bcdb0710e980c7ec85) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 81c11499bcf0..3cb70d73239b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12260,7 +12260,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error); if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) { ret = -EIO; - goto cleanup; + goto unlock; } atomic_inc(&intel_crtc->unpin_work_count); @@ -12352,6 +12352,7 @@ cleanup_unpin: intel_unpin_fb_obj(fb, crtc->primary->state->rotation); cleanup_pending: atomic_dec(&intel_crtc->unpin_work_count); +unlock: mutex_unlock(&dev->struct_mutex); cleanup: crtc->primary->fb = old_fb; -- cgit v1.2.3 From 5102981212454998d549273ff9847f19e97a1794 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Tue, 29 Nov 2016 09:45:04 +0100 Subject: crypto: drbg - prevent invalid SG mappings When using SGs, only heap memory (memory that is valid as per virt_addr_valid) is allowed to be referenced. The CTR DRBG used to reference the caller-provided memory directly in an SG. In case the caller provided stack memory pointers, the SG mapping is not considered to be valid. In some cases, this would even cause a paging fault. The change adds a new scratch buffer that is used unconditionally to catch the cases where the caller-provided buffer is not suitable for use in an SG. The crypto operation of the CTR DRBG produces its output with that scratch buffer and finally copies the content of the scratch buffer to the caller's buffer. The scratch buffer is allocated during allocation time of the CTR DRBG as its access is protected with the DRBG mutex. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/drbg.c | 29 ++++++++++++++++++++++++----- include/crypto/drbg.h | 2 ++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index fb33f7d3b052..053035b5c8f8 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -262,6 +262,7 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, u8 *inbuf, u32 inbuflen, u8 *outbuf, u32 outlen); #define DRBG_CTR_NULL_LEN 128 +#define DRBG_OUTSCRATCHLEN DRBG_CTR_NULL_LEN /* BCC function for CTR DRBG as defined in 10.4.3 */ static int drbg_ctr_bcc(struct drbg_state *drbg, @@ -1644,6 +1645,9 @@ static int drbg_fini_sym_kernel(struct drbg_state *drbg) kfree(drbg->ctr_null_value_buf); drbg->ctr_null_value = NULL; + kfree(drbg->outscratchpadbuf); + drbg->outscratchpadbuf = NULL; + return 0; } @@ -1708,6 +1712,15 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg) drbg->ctr_null_value = (u8 *)PTR_ALIGN(drbg->ctr_null_value_buf, alignmask + 1); + drbg->outscratchpadbuf = kmalloc(DRBG_OUTSCRATCHLEN + alignmask, + GFP_KERNEL); + if (!drbg->outscratchpadbuf) { + drbg_fini_sym_kernel(drbg); + return -ENOMEM; + } + drbg->outscratchpad = (u8 *)PTR_ALIGN(drbg->outscratchpadbuf, + alignmask + 1); + return alignmask; } @@ -1737,15 +1750,16 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, u8 *outbuf, u32 outlen) { struct scatterlist sg_in; + int ret; sg_init_one(&sg_in, inbuf, inlen); while (outlen) { - u32 cryptlen = min_t(u32, inlen, outlen); + u32 cryptlen = min3(inlen, outlen, (u32)DRBG_OUTSCRATCHLEN); struct scatterlist sg_out; - int ret; - sg_init_one(&sg_out, outbuf, cryptlen); + /* Output buffer may not be valid for SGL, use scratchpad */ + sg_init_one(&sg_out, drbg->outscratchpad, cryptlen); skcipher_request_set_crypt(drbg->ctr_req, &sg_in, &sg_out, cryptlen, drbg->V); ret = crypto_skcipher_encrypt(drbg->ctr_req); @@ -1761,14 +1775,19 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, break; } default: - return ret; + goto out; } init_completion(&drbg->ctr_completion); + memcpy(outbuf, drbg->outscratchpad, cryptlen); + outlen -= cryptlen; } + ret = 0; - return 0; +out: + memzero_explicit(drbg->outscratchpad, DRBG_OUTSCRATCHLEN); + return ret; } #endif /* CONFIG_CRYPTO_DRBG_CTR */ diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index 61580b19f9f6..22f884c97387 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -124,6 +124,8 @@ struct drbg_state { struct skcipher_request *ctr_req; /* CTR mode request handle */ __u8 *ctr_null_value_buf; /* CTR mode unaligned buffer */ __u8 *ctr_null_value; /* CTR mode aligned zero buf */ + __u8 *outscratchpadbuf; /* CTR mode output scratchpad */ + __u8 *outscratchpad; /* CTR mode aligned outbuf */ struct completion ctr_completion; /* CTR mode async handler */ int ctr_async_err; /* CTR mode async error */ -- cgit v1.2.3 From 66bf093772040ae8b864d2cf953f2c73005f7815 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Tue, 29 Nov 2016 19:00:34 +0530 Subject: crypto: chcr - Fix memory corruption Fix memory corruption done by *((u32 *)dec_key + k) operation. Signed-off-by: Jitendra Lulla Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h index ec64fbcdeb49..199b0bb69b89 100644 --- a/drivers/crypto/chelsio/chcr_algo.h +++ b/drivers/crypto/chelsio/chcr_algo.h @@ -422,7 +422,7 @@ static inline void get_aes_decrypt_key(unsigned char *dec_key, { u32 temp; u32 w_ring[MAX_NK]; - int i, j, k = 0; + int i, j, k; u8 nr, nk; switch (keylength) { @@ -460,6 +460,7 @@ static inline void get_aes_decrypt_key(unsigned char *dec_key, temp = w_ring[i % nk]; i++; } + i--; for (k = 0, j = i % nk; k < nk; k++) { *((u32 *)dec_key + k) = htonl(w_ring[j]); j--; -- cgit v1.2.3 From 57891633eeef60e732e045731cf20e50ee80acb4 Mon Sep 17 00:00:00 2001 From: David Michael Date: Tue, 29 Nov 2016 11:15:12 -0800 Subject: crypto: rsa - Add Makefile dependencies to fix parallel builds Both asn1 headers are included by rsa_helper.c, so rsa_helper.o should explicitly depend on them. Signed-off-by: David Michael Signed-off-by: Herbert Xu --- crypto/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/Makefile b/crypto/Makefile index 99cc64ac70ef..bd6a029094e6 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h +$(obj)/rsa_helper.o: $(obj)/rsapubkey-asn1.h $(obj)/rsaprivkey-asn1.h clean-files += rsapubkey-asn1.c rsapubkey-asn1.h clean-files += rsaprivkey-asn1.c rsaprivkey-asn1.h -- cgit v1.2.3 From 409bf7f8a02ef88db5a0f2cdcf9489914f4b8508 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 1 Dec 2016 11:23:05 +1100 Subject: powerpc/eeh: Fix deadlock when PE frozen state can't be cleared In eeh_reset_device(), we take the pci_rescan_remove_lock immediately after after we call eeh_reset_pe() to reset the PCI controller. We then call eeh_clear_pe_frozen_state(), which can return an error. In this case, we bail out of eeh_reset_device() without calling pci_unlock_rescan_remove(). Add a call to pci_unlock_rescan_remove() in the eeh_clear_pe_frozen_state() error path so that we don't cause a deadlock later on. Reported-by: Pradipta Ghosh Fixes: 78954700631f ("powerpc/eeh: Avoid I/O access during PE reset") Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Andrew Donnellan Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index a62be72da274..5c31369435f2 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -671,8 +671,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); - if (rc) + if (rc) { + pci_unlock_rescan_remove(); return rc; + } /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, -- cgit v1.2.3 From dadc4a1bb9f0095343ed9dd4f1d9f3825d7b3e45 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2016 14:20:31 +1100 Subject: powerpc/64: Fix placement of .text to be immediately following .head.text Do not introduce any additional alignment. Placement of text section will be set by fixed section macros. Without this, output section alignment defaults to 4096, which makes BookE text section start at 0x1000 when it is expected to start at 0x100. This was introduced by commit 57f266497d81 ("powerpc: Use gas sections for arranging exception vectors") and was caught with the scripted head section checker (not yet merged). Fixes: 57f266497d81 ("powerpc: Use gas sections for arranging exception vectors") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vmlinux.lds.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8295f51c1a5f..7394b770ae1f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -94,8 +94,17 @@ SECTIONS * detected, and will result in a crash at boot due to offsets being * wrong. */ +#ifdef CONFIG_PPC64 + /* + * BLOCK(0) overrides the default output section alignment because + * this needs to start right after .head.text in order for fixed + * section placement to work. + */ + .text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) { +#else .text : AT(ADDR(.text) - LOAD_OFFSET) { ALIGN_FUNCTION(); +#endif /* careful! __ftr_alt_* sections need to be close to .text */ *(.text .fixup __ftr_alt_* .ref.text) SCHED_TEXT -- cgit v1.2.3 From c2d0f48a13e53b4747704c9e692f5e765e52041a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 30 Nov 2016 21:47:09 +0100 Subject: batman-adv: Check for alloc errors when preparing TT local data batadv_tt_prepare_tvlv_local_data can fail to allocate the memory for the new TVLV block. The caller is informed about this problem with the returned length of 0. Not checking this value results in an invalid memory access when either tt_data or tt_change is accessed. Reported-by: Dan Carpenter Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7f663092f6de..0dc85eb1cb7a 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3282,7 +3282,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, &tvlv_tt_data, &tt_change, &tt_len); - if (!tt_len) + if (!tt_len || !tvlv_len) goto unlock; /* Copy the last orig_node's OGM buffer */ @@ -3300,7 +3300,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, &tvlv_tt_data, &tt_change, &tt_len); - if (!tt_len) + if (!tt_len || !tvlv_len) goto out; /* fill the rest of the tvlv with the real TT entries */ -- cgit v1.2.3 From 865563924022d8a307ee6dbc6a9ab4fb4d461cce Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 2 Dec 2016 15:11:50 -0500 Subject: kbuild: fix building bzImage with CONFIG_TRIM_UNUSED_KSYMS enabled When building a specific target such as bzImage, modules aren't normally built. However if CONFIG_TRIM_UNUSED_KSYMS is enabled, no built modules means none of the exported symbols are used and therefore they will all be trimmed away from the final kernel. A subsequent "make modules" will fail because modpost cannot find the needed symbols for those modules in the kernel binary. Let's make sure modules are also built whenever CONFIG_TRIM_UNUSED_KSYMS is enabled and that the kernel binary is properly rebuilt accordingly. Signed-off-by: Nicolas Pitre Tested-by: Jarod Wilson Signed-off-by: Linus Torvalds --- Makefile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9f9c3b577c75..b816089e5dc9 100644 --- a/Makefile +++ b/Makefile @@ -607,6 +607,13 @@ else include/config/auto.conf: ; endif # $(dot-config) +# For the kernel to actually contain only the needed exported symbols, +# we have to build modules as well to determine what those symbols are. +# (this can be evaluated only once include/config/auto.conf has been included) +ifdef CONFIG_TRIM_UNUSED_KSYMS + KBUILD_MODULES := 1 +endif + # The all: target is the default when no target is given on the # command line. # This allow a user to issue only 'make' to build a kernel including modules @@ -944,7 +951,7 @@ ifdef CONFIG_GDB_SCRIPTS endif ifdef CONFIG_TRIM_UNUSED_KSYMS $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \ - "$(MAKE) KBUILD_MODULES=1 -f $(srctree)/Makefile vmlinux_prereq" + "$(MAKE) -f $(srctree)/Makefile vmlinux" endif # standalone target for easier testing -- cgit v1.2.3 From 20ab67a563f5299c09a234164c372aba5a59add8 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Dec 2016 17:26:45 -0800 Subject: mm: workingset: fix NULL ptr in count_shadow_nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0a6b76dd23fa ("mm: workingset: make shadow node shrinker memcg aware") has made the workingset shadow nodes shrinker memcg aware. The implementation is not correct though because memcg_kmem_enabled() might become true while we are doing a global reclaim when the sc->memcg might be NULL which is exactly what Marek has seen: BUG: unable to handle kernel NULL pointer dereference at 0000000000000400 IP: [] mem_cgroup_node_nr_lru_pages+0x20/0x40 PGD 0 Oops: 0000 [#1] SMP CPU: 0 PID: 60 Comm: kswapd0 Tainted: G O 4.8.10-12.pvops.qubes.x86_64 #1 task: ffff880011863b00 task.stack: ffff880011868000 RIP: mem_cgroup_node_nr_lru_pages+0x20/0x40 RSP: e02b:ffff88001186bc70 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff88001186bd20 RCX: 0000000000000002 RDX: 000000000000000c RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88001186bc70 R08: 28f5c28f5c28f5c3 R09: 0000000000000000 R10: 0000000000006c34 R11: 0000000000000333 R12: 00000000000001f6 R13: ffffffff81c6f6a0 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff880013c00000(0000) knlGS:ffff880013d00000 CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000400 CR3: 00000000122f2000 CR4: 0000000000042660 Call Trace: count_shadow_nodes+0x9a/0xa0 shrink_slab.part.42+0x119/0x3e0 shrink_node+0x22c/0x320 kswapd+0x32c/0x700 kthread+0xd8/0xf0 ret_from_fork+0x1f/0x40 Code: 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 3b 35 dd eb b1 00 55 48 89 e5 73 2c 89 d2 31 c9 31 c0 4c 63 ce 48 0f a3 ca 73 13 <4a> 8b b4 cf 00 04 00 00 41 89 c8 4a 03 84 c6 80 00 00 00 83 c1 RIP mem_cgroup_node_nr_lru_pages+0x20/0x40 RSP CR2: 0000000000000400 ---[ end trace 100494b9edbdfc4d ]--- This patch fixes the issue by checking sc->memcg rather than memcg_kmem_enabled() which is sufficient because shrink_slab makes sure that only memcg aware shrinkers will get non-NULL memcgs and only if memcg_kmem_enabled is true. Fixes: 0a6b76dd23fa ("mm: workingset: make shadow node shrinker memcg aware") Link: http://lkml.kernel.org/r/20161201132156.21450-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Marek Marczykowski-Górecki Tested-by: Marek Marczykowski-Górecki Acked-by: Vladimir Davydov Acked-by: Johannes Weiner Acked-by: Balbir Singh Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/workingset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/workingset.c b/mm/workingset.c index 617475f529f4..fb1f9183d89a 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -348,7 +348,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc); local_irq_enable(); - if (memcg_kmem_enabled()) { + if (sc->memcg) { pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, LRU_ALL_FILE); } else { -- cgit v1.2.3 From bd041733c9f612b66c519e5a8b1a98b05b94ed24 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Dec 2016 17:26:48 -0800 Subject: mm, vmscan: add cond_resched() into shrink_node_memcg() Boris Zhmurov has reported RCU stalls during the kswapd reclaim: INFO: rcu_sched detected stalls on CPUs/tasks: 23-...: (22 ticks this GP) idle=92f/140000000000000/0 softirq=2638404/2638404 fqs=23 (detected by 4, t=6389 jiffies, g=786259, c=786258, q=42115) Task dump for CPU 23: kswapd1 R running task 0 148 2 0x00000008 Call Trace: shrink_node+0xd2/0x2f0 kswapd+0x2cb/0x6a0 mem_cgroup_shrink_node+0x160/0x160 kthread+0xbd/0xe0 __switch_to+0x1fa/0x5c0 ret_from_fork+0x1f/0x40 kthread_create_on_node+0x180/0x180 a closer code inspection has shown that we might indeed miss all the scheduling points in the reclaim path if no pages can be isolated from the LRU list. This is a pathological case but other reports from Donald Buczek have shown that we might indeed hit such a path: clusterd-989 [009] .... 118023.654491: mm_vmscan_direct_reclaim_end: nr_reclaimed=193 kswapd1-86 [001] dN.. 118023.987475: mm_vmscan_lru_isolate: isolate_mode=0 classzone=0 order=0 nr_requested=32 nr_scanned=4239830 nr_taken=0 file=1 kswapd1-86 [001] dN.. 118024.320968: mm_vmscan_lru_isolate: isolate_mode=0 classzone=0 order=0 nr_requested=32 nr_scanned=4239844 nr_taken=0 file=1 kswapd1-86 [001] dN.. 118024.654375: mm_vmscan_lru_isolate: isolate_mode=0 classzone=0 order=0 nr_requested=32 nr_scanned=4239858 nr_taken=0 file=1 kswapd1-86 [001] dN.. 118024.987036: mm_vmscan_lru_isolate: isolate_mode=0 classzone=0 order=0 nr_requested=32 nr_scanned=4239872 nr_taken=0 file=1 kswapd1-86 [001] dN.. 118025.319651: mm_vmscan_lru_isolate: isolate_mode=0 classzone=0 order=0 nr_requested=32 nr_scanned=4239886 nr_taken=0 file=1 kswapd1-86 [001] dN.. 118025.652248: mm_vmscan_lru_isolate: isolate_mode=0 classzone=0 order=0 nr_requested=32 nr_scanned=4239900 nr_taken=0 file=1 kswapd1-86 [001] dN.. 118025.984870: mm_vmscan_lru_isolate: isolate_mode=0 classzone=0 order=0 nr_requested=32 nr_scanned=4239914 nr_taken=0 file=1 [...] kswapd1-86 [001] dN.. 118084.274403: mm_vmscan_lru_isolate: isolate_mode=0 classzone=0 order=0 nr_requested=32 nr_scanned=4241133 nr_taken=0 file=1 this is minute long snapshot which didn't take a single page from the LRU. It is not entirely clear why only 1303 pages have been scanned during that time (maybe there was a heavy IRQ activity interfering). In any case it looks like we can really hit long periods without scheduling on non preemptive kernels so an explicit cond_resched() in shrink_node_memcg which is independent on the reclaim operation is due. Link: http://lkml.kernel.org/r/20161202095841.16648-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Boris Zhmurov Tested-by: Boris Zhmurov Reported-by: Donald Buczek Reported-by: "Christopher S. Aker" Reported-by: Paul Menzel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index 76fda2268148..d75cdf360730 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2354,6 +2354,8 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc } } + cond_resched(); + if (nr_reclaimed < nr_to_reclaim || scan_adjusted) continue; -- cgit v1.2.3 From c66ebf2db555c6ed705044eabd2b37dcd546f68b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 21:49:08 +0800 Subject: net: dcb: set error code on failures In function dcbnl_cee_fill(), returns the value of variable err on errors. However, on some error paths (e.g. nla put fails), its value may be 0. It may be better to explicitly set a negative errno to variable err before returning. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188881 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 4f6c1862dfd2..3202d75329b5 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1353,6 +1353,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) dcb_unlock: spin_unlock_bh(&dcb_lock); nla_put_failure: + err = -EMSGSIZE; return err; } -- cgit v1.2.3 From 3e5de27e940d00d8d504dfb96625fb654f641509 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Dec 2016 12:50:51 -0800 Subject: Linux 4.9-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b816089e5dc9..369099dc0fae 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v1.2.3 From 1a239173cccff726b60ac6a9c79ae4a1e26cfa49 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 1 Dec 2016 07:27:52 -0500 Subject: ipv4: Drop leaf from suffix pull/push functions It wasn't necessary to pass a leaf in when doing the suffix updates so just drop it. Instead just pass the suffix and work with that. Since we dropped the leaf there is no need to include that in the name so the names are updated to node_push_suffix and node_pull_suffix. Finally I noticed that the logic for pulling the suffix length back actually had some issues. Specifically it would stop prematurely if there was a longer suffix, but it was not as long as the original suffix. I updated the code to address that in node_pull_suffix. Fixes: 5405afd1a306 ("fib_trie: Add tracking value for suffix length") Suggested-by: Robert Shearman Signed-off-by: Alexander Duyck Reviewed-by: Robert Shearman Tested-by: Robert Shearman Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 026f309c51e9..eec90d72dd52 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -930,22 +930,24 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn) return tp; } -static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l) +static void node_pull_suffix(struct key_vector *tn, unsigned char slen) { - while ((tp->slen > tp->pos) && (tp->slen > l->slen)) { - if (update_suffix(tp) > l->slen) + unsigned char node_slen = tn->slen; + + while ((node_slen > tn->pos) && (node_slen > slen)) { + slen = update_suffix(tn); + if (node_slen == slen) break; - tp = node_parent(tp); + + tn = node_parent(tn); + node_slen = tn->slen; } } -static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l) +static void node_push_suffix(struct key_vector *tn, unsigned char slen) { - /* if this is a new leaf then tn will be NULL and we can sort - * out parent suffix lengths as a part of trie_rebalance - */ - while (tn->slen < l->slen) { - tn->slen = l->slen; + while (tn->slen < slen) { + tn->slen = slen; tn = node_parent(tn); } } @@ -1107,7 +1109,7 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, /* if we added to the tail node then we need to update slen */ if (l->slen < new->fa_slen) { l->slen = new->fa_slen; - leaf_push_suffix(tp, l); + node_push_suffix(tp, new->fa_slen); } return 0; @@ -1511,7 +1513,7 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, /* update the trie with the latest suffix length */ l->slen = fa->fa_slen; - leaf_pull_suffix(tp, l); + node_pull_suffix(tp, fa->fa_slen); } /* Caller must hold RTNL. */ -- cgit v1.2.3 From a52ca62c4a6771028da9c1de934cdbcd93d54bb4 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 1 Dec 2016 07:27:57 -0500 Subject: ipv4: Drop suffix update from resize code It has been reported that update_suffix can be expensive when it is called on a large node in which most of the suffix lengths are the same. The time required to add 200K entries had increased from around 3 seconds to almost 49 seconds. In order to address this we need to move the code for updating the suffix out of resize and instead just have it handled in the cases where we are pushing a node that increases the suffix length, or will decrease the suffix length. Fixes: 5405afd1a306 ("fib_trie: Add tracking value for suffix length") Reported-by: Robert Shearman Signed-off-by: Alexander Duyck Reviewed-by: Robert Shearman Tested-by: Robert Shearman Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index eec90d72dd52..e3665bf7a7f3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -719,6 +719,13 @@ static unsigned char update_suffix(struct key_vector *tn) { unsigned char slen = tn->pos; unsigned long stride, i; + unsigned char slen_max; + + /* only vector 0 can have a suffix length greater than or equal to + * tn->pos + tn->bits, the second highest node will have a suffix + * length at most of tn->pos + tn->bits - 1 + */ + slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen); /* search though the list of children looking for nodes that might * have a suffix greater than the one we currently have. This is @@ -736,12 +743,8 @@ static unsigned char update_suffix(struct key_vector *tn) slen = n->slen; i &= ~(stride - 1); - /* if slen covers all but the last bit we can stop here - * there will be nothing longer than that since only node - * 0 and 1 << (bits - 1) could have that as their suffix - * length. - */ - if ((slen + 1) >= (tn->pos + tn->bits)) + /* stop searching if we have hit the maximum possible value */ + if (slen >= slen_max) break; } @@ -913,21 +916,7 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn) return collapse(t, tn); /* update parent in case halve failed */ - tp = node_parent(tn); - - /* Return if at least one deflate was run */ - if (max_work != MAX_WORK) - return tp; - - /* push the suffix length to the parent node */ - if (tn->slen > tn->pos) { - unsigned char slen = update_suffix(tn); - - if (slen > tp->slen) - tp->slen = slen; - } - - return tp; + return node_parent(tn); } static void node_pull_suffix(struct key_vector *tn, unsigned char slen) @@ -1068,6 +1057,7 @@ static int fib_insert_node(struct trie *t, struct key_vector *tp, } /* Case 3: n is NULL, and will just insert a new leaf */ + node_push_suffix(tp, new->fa_slen); NODE_INIT_PARENT(l, tp); put_child_root(tp, key, l); trie_rebalance(t, tp); @@ -1501,6 +1491,8 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, * out parent suffix lengths as a part of trie_rebalance */ if (hlist_empty(&l->leaf)) { + if (tp->slen == l->slen) + node_pull_suffix(tp, tp->pos); put_child_root(tp, l->key, NULL); node_free(l); trie_rebalance(t, tp); @@ -1785,6 +1777,10 @@ void fib_table_flush_external(struct fib_table *tb) if (IS_TRIE(pn)) break; + /* update the suffix to address pulled leaves */ + if (pn->slen > pn->pos) + update_suffix(pn); + /* resize completed node */ pn = resize(t, pn); cindex = get_index(pkey, pn); @@ -1851,6 +1847,10 @@ int fib_table_flush(struct net *net, struct fib_table *tb) if (IS_TRIE(pn)) break; + /* update the suffix to address pulled leaves */ + if (pn->slen > pn->pos) + update_suffix(pn); + /* resize completed node */ pn = resize(t, pn); cindex = get_index(pkey, pn); -- cgit v1.2.3 From 89aa8445cd4e8c2556c40d42dd0ceb2cbb96ba78 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 17:56:17 +0800 Subject: netdev: broadcom: propagate error code Function bnxt_hwrm_stat_ctx_alloc() always returns 0, even if the call to _hwrm_send_message() fails. It may be better to propagate the errors to the caller of bnxt_hwrm_stat_ctx_alloc(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188661 Signed-off-by: Pan Bian Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ee1a803aa11a..f08a20b921e7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4120,7 +4120,7 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp) bp->grp_info[i].fw_stats_ctx = cpr->hw_stats_ctx_id; } mutex_unlock(&bp->hwrm_cmd_lock); - return 0; + return rc; } static int bnxt_hwrm_func_qcfg(struct bnxt *bp) -- cgit v1.2.3 From 14dd3e1b970feb125e4f453bc3b0569db5b2069b Mon Sep 17 00:00:00 2001 From: Suraj Deshmukh Date: Sat, 3 Dec 2016 07:59:26 +0000 Subject: net: af_mpls.c add space before open parenthesis Adding space after switch keyword before open parenthesis for readability purpose. This patch fixes the checkpatch.pl warning: space required before the open parenthesis '(' Signed-off-by: Suraj Deshmukh Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 0e4334cbde17..15fe97644ffe 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1252,7 +1252,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (!nla) continue; - switch(index) { + switch (index) { case RTA_OIF: cfg->rc_ifindex = nla_get_u32(nla); break; -- cgit v1.2.3 From b59589635ff01cc25270360709eeeb5c45c6abb9 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 19:33:23 +0800 Subject: net: bridge: set error code on failure Function br_sysfs_addbr() does not set error code when the call kobject_create_and_add() returns a NULL pointer. It may be better to return "-ENOMEM" when kobject_create_and_add() fails. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188781 Signed-off-by: Pan Bian Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_sysfs_br.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index e120307c6e36..f88c4df3f91e 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -898,6 +898,7 @@ int br_sysfs_addbr(struct net_device *dev) if (!br->ifobj) { pr_info("%s: can't add kobject (directory) %s/%s\n", __func__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR); + err = -ENOMEM; goto out3; } return 0; -- cgit v1.2.3 From 51920830d9d0eb617af18dc60443fcd4fb50a533 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 19:24:48 +0800 Subject: net: usb: set error code when usb_alloc_urb fails In function lan78xx_probe(), variable ret takes the errno code on failures. However, when the call to usb_alloc_urb() fails, its value will keeps 0. 0 indicates success in the context, which is inconsistent with the execution result. This patch fixes the bug, assigning "-ENOMEM" to ret when usb_alloc_urb() returns a NULL pointer. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188771 Signed-off-by: Pan Bian Acked-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index db558b8b32fe..f33460cec79f 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3395,6 +3395,7 @@ static int lan78xx_probe(struct usb_interface *intf, if (buf) { dev->urb_intr = usb_alloc_urb(0, GFP_KERNEL); if (!dev->urb_intr) { + ret = -ENOMEM; kfree(buf); goto out3; } else { -- cgit v1.2.3 From 4606c9e8c541f97034e53e644129376a6170b8c7 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 20:25:45 +0800 Subject: atm: lanai: set error code when ioremap fails In function lanai_dev_open(), when the call to ioremap() fails, the value of return variable result is 0. 0 means no error in this context. This patch fixes the bug, assigning "-ENOMEM" to result when ioremap() returns a NULL pointer. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188791 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/atm/lanai.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index ce43ae3e87b3..445505d9ea07 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -2143,6 +2143,7 @@ static int lanai_dev_open(struct atm_dev *atmdev) lanai->base = (bus_addr_t) ioremap(raw_base, LANAI_MAPPING_SIZE); if (lanai->base == NULL) { printk(KERN_ERR DEV_LABEL ": couldn't remap I/O space\n"); + result = -ENOMEM; goto error_pci; } /* 3.3: Reset lanai and PHY */ -- cgit v1.2.3 From 0eab121ef8750a5c8637d51534d5e9143fb0633f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 5 Dec 2016 10:34:38 -0800 Subject: net: ping: check minimum size on ICMP header length Prior to commit c0371da6047a ("put iov_iter into msghdr") in v3.19, there was no check that the iovec contained enough bytes for an ICMP header, and the read loop would walk across neighboring stack contents. Since the iov_iter conversion, bad arguments are noticed, but the returned error is EFAULT. Returning EINVAL is a clearer error and also solves the problem prior to v3.19. This was found using trinity with KASAN on v3.18: BUG: KASAN: stack-out-of-bounds in memcpy_fromiovec+0x60/0x114 at addr ffffffc071077da0 Read of size 8 by task trinity-c2/9623 page:ffffffbe034b9a08 count:0 mapcount:0 mapping: (null) index:0x0 flags: 0x0() page dumped because: kasan: bad access detected CPU: 0 PID: 9623 Comm: trinity-c2 Tainted: G BU 3.18.0-dirty #15 Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT) Call trace: [] dump_backtrace+0x0/0x1ac arch/arm64/kernel/traps.c:90 [] show_stack+0x10/0x1c arch/arm64/kernel/traps.c:171 [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x7c/0xd0 lib/dump_stack.c:50 [< inline >] print_address_description mm/kasan/report.c:147 [< inline >] kasan_report_error mm/kasan/report.c:236 [] kasan_report+0x380/0x4b8 mm/kasan/report.c:259 [< inline >] check_memory_region mm/kasan/kasan.c:264 [] __asan_load8+0x20/0x70 mm/kasan/kasan.c:507 [] memcpy_fromiovec+0x5c/0x114 lib/iovec.c:15 [< inline >] memcpy_from_msg include/linux/skbuff.h:2667 [] ping_common_sendmsg+0x50/0x108 net/ipv4/ping.c:674 [] ping_v4_sendmsg+0xd8/0x698 net/ipv4/ping.c:714 [] inet_sendmsg+0xe0/0x12c net/ipv4/af_inet.c:749 [< inline >] __sock_sendmsg_nosec net/socket.c:624 [< inline >] __sock_sendmsg net/socket.c:632 [] sock_sendmsg+0x124/0x164 net/socket.c:643 [< inline >] SYSC_sendto net/socket.c:1797 [] SyS_sendto+0x178/0x1d8 net/socket.c:1761 CVE-2016-8399 Reported-by: Qidan He Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/ipv4/ping.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 205e2000d395..96b8e2b95731 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -657,6 +657,10 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, if (len > 0xFFFF) return -EMSGSIZE; + /* Must have at least a full ICMP header. */ + if (len < icmph_len) + return -EINVAL; + /* * Check the flags. */ -- cgit v1.2.3 From c79e167c3cba066892542f3dfb5e73b7207e01df Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 12:15:44 +0800 Subject: net: caif: remove ineffective check The check of the return value of sock_register() is ineffective. "if(!err)" seems to be a typo. It is better to propagate the error code to the callers of caif_sktinit_module(). This patch removes the check statment and directly returns the result of sock_register(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188751 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index aa209b1066c9..92cbbd2afddb 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1107,10 +1107,7 @@ static struct net_proto_family caif_family_ops = { static int __init caif_sktinit_module(void) { - int err = sock_register(&caif_family_ops); - if (!err) - return err; - return 0; + return sock_register(&caif_family_ops); } static void __exit caif_sktexit_module(void) -- cgit v1.2.3 From 8ad3ba934587c8ecbfee13331d859a7849afdfbb Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 13:27:40 +0800 Subject: net: irda: set error code on failures When the calls to kzalloc() fail, the value of return variable ret may be 0. 0 means success in this context. This patch fixes the bug, assigning "-ENOMEM" to ret before calling kzalloc(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188971 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/net/irda/irda-usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c index a198946bc54f..8716b8c07feb 100644 --- a/drivers/net/irda/irda-usb.c +++ b/drivers/net/irda/irda-usb.c @@ -1723,6 +1723,7 @@ static int irda_usb_probe(struct usb_interface *intf, /* Don't change this buffer size and allocation without doing * some heavy and complete testing. Don't ask why :-( * Jean II */ + ret = -ENOMEM; self->speed_buff = kzalloc(IRDA_USB_SPEED_MTU, GFP_KERNEL); if (!self->speed_buff) goto err_out_3; -- cgit v1.2.3 From 7cf6156633b71743c09a8e56b1f0dedfc4ce6e66 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 13:45:15 +0800 Subject: atm: fix improper return value It returns variable "error" when ioremap_nocache() returns a NULL pointer. The value of "error" is 0 then, which will mislead the callers to believe that there is no error. This patch fixes the bug, returning "-ENOMEM". Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=189021 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/atm/eni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index f2aaf9e32a36..40c2d561417b 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -1727,7 +1727,7 @@ static int eni_do_init(struct atm_dev *dev) printk("\n"); printk(KERN_ERR DEV_LABEL "(itf %d): can't set up page " "mapping\n",dev->number); - return error; + return -ENOMEM; } eni_dev->ioaddr = base; eni_dev->base_diff = real_base - (unsigned long) base; -- cgit v1.2.3 From 0ff18d2d36efad65572990fa7febeb3ebe19da89 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 13:53:53 +0800 Subject: net: ethernet: qlogic: set error code on failure When calling dma_mapping_error(), the value of return variable rc is 0. And when the call returns an unexpected value, rc is not set to a negative errno. Thus, it will return 0 on the error path, and its callers cannot detect the bug. This patch fixes the bug, assigning "-ENOMEM" to err. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=189041 Signed-off-by: Pan Bian Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index f95385cbbd40..62ae55bd81b8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1730,6 +1730,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) mapping))) { DP_NOTICE(cdev, "Unable to map frag - dropping packet\n"); + rc = -ENOMEM; goto err; } } else { -- cgit v1.2.3 From 005f7e68e74df94c2a676b5a3e98c6fb65aae606 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 18:46:03 +0800 Subject: net: bnx2x: fix improper return value Macro BNX2X_ALLOC_AND_SET(arr, lbl, func) calls kmalloc() to allocate memory, and jumps to label "lbl" if the allocation fails. Label "lbl" first cleans memory and then returns variable rc. Before calling the macro, the value of variable rc is 0. Because 0 means no error, the callers of bnx2x_init_firmware() may be misled. This patch fixes the bug, assigning "-ENOMEM" to rc before calling macro NX2X_ALLOC_AND_SET(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=189141 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 0cee4c0283f9..6f9fc20f0512 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13505,6 +13505,7 @@ static int bnx2x_init_firmware(struct bnx2x *bp) /* Initialize the pointers to the init arrays */ /* Blob */ + rc = -ENOMEM; BNX2X_ALLOC_AND_SET(init_data, request_firmware_exit, be32_to_cpu_n); /* Opcodes */ -- cgit v1.2.3 From 9a53682b340b97642793271ba095cc9531a7b649 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 18:43:31 +0800 Subject: isdn: hisax: set error code on failure In function hfc4s8s_probe(), the value of return variable err should be negative on failures. However, when the call to request_region() returns NULL, the value of err is 0. This patch fixes the bug, assigning "-EBUSY" to err on the path that request_region() fails. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188931 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/isdn/hisax/hfc4s8s_l1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/isdn/hisax/hfc4s8s_l1.c b/drivers/isdn/hisax/hfc4s8s_l1.c index 9600cd771f1a..e034ed847ff3 100644 --- a/drivers/isdn/hisax/hfc4s8s_l1.c +++ b/drivers/isdn/hisax/hfc4s8s_l1.c @@ -1499,6 +1499,7 @@ hfc4s8s_probe(struct pci_dev *pdev, const struct pci_device_id *ent) printk(KERN_INFO "HFC-4S/8S: failed to request address space at 0x%04x\n", hw->iobase); + err = -EBUSY; goto out; } -- cgit v1.2.3 From 65870fa77fd7f83d7be4ed924d47ed9e3831f434 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 4 Dec 2016 15:30:17 +0200 Subject: bnx2x: Correct ringparam estimate when DOWN Until interface is up [and assuming ringparams weren't explicitly configured] when queried for the size of its rings bnx2x would claim they're the maximal size by default. That is incorrect as by default the maximal number of buffers would be equally divided between the various rx rings. This prevents the user from actually setting the number of elements on each rx ring to be of maximal size prior to transitioning the interface into up state. To fix this, make a rough estimation about the number of buffers. It wouldn't always be accurate, but it would be much better than current estimation and would allow users to increase number of buffers during early initialization of the interface. Reported-by: Seymour, Shane Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 85a7800bfc12..5f19427c7b27 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -1872,8 +1872,16 @@ static void bnx2x_get_ringparam(struct net_device *dev, ering->rx_max_pending = MAX_RX_AVAIL; + /* If size isn't already set, we give an estimation of the number + * of buffers we'll have. We're neglecting some possible conditions + * [we couldn't know for certain at this point if number of queues + * might shrink] but the number would be correct for the likely + * scenario. + */ if (bp->rx_ring_size) ering->rx_pending = bp->rx_ring_size; + else if (BNX2X_NUM_RX_QUEUES(bp)) + ering->rx_pending = MAX_RX_AVAIL / BNX2X_NUM_RX_QUEUES(bp); else ering->rx_pending = MAX_RX_AVAIL; -- cgit v1.2.3 From 360d9df2acd9f0b89aabaf16fca08954f113bd4e Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 4 Dec 2016 15:30:18 +0200 Subject: bnx2x: Prevent tunnel config for 577xx Only the 578xx adapters are capable of configuring UDP ports for the purpose of tunnelling - doing the same on 577xx might lead to a firmware assertion. We're already not claiming support for any related feature for such devices, but we also need to prevent the configuration of the UDP ports to the device in this case. Fixes: f34fa14cc033 ("bnx2x: Add vxlan RSS support") Reported-by: Anikina Anna Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 6f9fc20f0512..4febe60eadc2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10138,7 +10138,7 @@ static void __bnx2x_add_udp_port(struct bnx2x *bp, u16 port, { struct bnx2x_udp_tunnel *udp_port = &bp->udp_tunnel_ports[type]; - if (!netif_running(bp->dev) || !IS_PF(bp)) + if (!netif_running(bp->dev) || !IS_PF(bp) || CHIP_IS_E1x(bp)) return; if (udp_port->count && udp_port->dst_port == port) { @@ -10163,7 +10163,7 @@ static void __bnx2x_del_udp_port(struct bnx2x *bp, u16 port, { struct bnx2x_udp_tunnel *udp_port = &bp->udp_tunnel_ports[type]; - if (!IS_PF(bp)) + if (!IS_PF(bp) || CHIP_IS_E1x(bp)) return; if (!udp_port->count || udp_port->dst_port != port) { -- cgit v1.2.3 From c823abac17926767fb50175e098f087a6ac684c3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 4 Dec 2016 19:22:05 -0800 Subject: net: ep93xx_eth: Do not crash unloading module When we unload the ep93xx_eth, whether we have opened the network interface or not, we will either hit a kernel paging request error, or a simple NULL pointer de-reference because: - if ep93xx_open has been called, we have created a valid DMA mapping for ep->descs, when we call ep93xx_stop, we also call ep93xx_free_buffers, ep->descs now has a stale value - if ep93xx_open has not been called, we have a NULL pointer for ep->descs, so performing any operation against that address just won't work Fix this by adding a NULL pointer check for ep->descs which means that ep93xx_free_buffers() was able to successfully tear down the descriptors and free the DMA cookie as well. Fixes: 1d22e05df818 ("[PATCH] Cirrus Logic ep93xx ethernet driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/cirrus/ep93xx_eth.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index de9f7c97d916..9a161e981529 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -468,6 +468,9 @@ static void ep93xx_free_buffers(struct ep93xx_priv *ep) struct device *dev = ep->dev->dev.parent; int i; + if (!ep->descs) + return; + for (i = 0; i < RX_QUEUE_ENTRIES; i++) { dma_addr_t d; @@ -490,6 +493,7 @@ static void ep93xx_free_buffers(struct ep93xx_priv *ep) dma_free_coherent(dev, sizeof(struct ep93xx_descs), ep->descs, ep->descs_dma_addr); + ep->descs = NULL; } static int ep93xx_alloc_buffers(struct ep93xx_priv *ep) -- cgit v1.2.3 From ad558858295726cb876b78d1c39d471372f1901a Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 2 Dec 2016 14:53:59 -0800 Subject: uapi: export tc_skbmod.h Fixes commit 735cffe5d800 ("net_sched: Introduce skbmod action") Not used by iproute2 but maybe in future. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index 9611c7b6c18f..e3db7403296f 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -12,3 +12,4 @@ header-y += tc_bpf.h header-y += tc_connmark.h header-y += tc_ife.h header-y += tc_tunnel_key.h +header-y += tc_skbmod.h -- cgit v1.2.3 From ffe3bb85c19e1dbf96cc13aad823ae0a8855d066 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 2 Dec 2016 14:54:00 -0800 Subject: uapi: export nf_log.h File is in uapi directory but not being copied on make install_headers Fixes commit 4ec9c8fbbc22 ("netfilter: nft_log: complete NFTA_LOG_FLAGS attr support"). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index cd26d7a0fd07..03f194aeadc5 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h +header-y += nf_log.h header-y += nf_tables.h header-y += nf_tables_compat.h header-y += nf_nat.h -- cgit v1.2.3 From ed5d7788a934a4b6d6d025e948ed4da496b4f12e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 5 Dec 2016 15:28:21 +0800 Subject: netlink: Do not schedule work from sk_destruct It is wrong to schedule a work from sk_destruct using the socket as the memory reserve because the socket will be freed immediately after the return from sk_destruct. Instead we should do the deferral prior to sk_free. This patch does just that. Fixes: 707693c8a498 ("netlink: Call cb->done from a worker thread") Signed-off-by: Herbert Xu Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 602e5ebe9db3..246f29d365c0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -322,11 +322,13 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) sk_mem_charge(sk, skb->truesize); } -static void __netlink_sock_destruct(struct sock *sk) +static void netlink_sock_destruct(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); if (nlk->cb_running) { + if (nlk->cb.done) + nlk->cb.done(&nlk->cb); module_put(nlk->cb.module); kfree_skb(nlk->cb.skb); } @@ -348,21 +350,7 @@ static void netlink_sock_destruct_work(struct work_struct *work) struct netlink_sock *nlk = container_of(work, struct netlink_sock, work); - nlk->cb.done(&nlk->cb); - __netlink_sock_destruct(&nlk->sk); -} - -static void netlink_sock_destruct(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->cb_running && nlk->cb.done) { - INIT_WORK(&nlk->work, netlink_sock_destruct_work); - schedule_work(&nlk->work); - return; - } - - __netlink_sock_destruct(sk); + sk_free(&nlk->sk); } /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on @@ -667,8 +655,18 @@ out_module: static void deferred_put_nlk_sk(struct rcu_head *head) { struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); + struct sock *sk = &nlk->sk; + + if (!atomic_dec_and_test(&sk->sk_refcnt)) + return; + + if (nlk->cb_running && nlk->cb.done) { + INIT_WORK(&nlk->work, netlink_sock_destruct_work); + schedule_work(&nlk->work); + return; + } - sock_put(&nlk->sk); + sk_free(sk); } static int netlink_release(struct socket *sock) -- cgit v1.2.3 From 6b3374cb1c0bd4699ace03d7e0dc14b532e4f52e Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 5 Dec 2016 18:12:54 +0100 Subject: net: stmmac: clear reset value of snps, wr_osr_lmt/snps, rd_osr_lmt before writing WR_OSR_LMT and RD_OSR_LMT have a reset value of 1. Since the reset value wasn't cleared before writing, the value in the register would be incorrect if specifying an uneven value for snps,wr_osr_lmt/snps,rd_osr_lmt. Zero is a valid value for the properties, since the databook specifies: maximum outstanding requests = WR_OSR_LMT + 1. We do not want to change the behavior for existing users when the property is missing. Therefore, default to 1 if the property is missing, since that is the same as the reset value. Signed-off-by: Niklas Cassel Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c | 2 ++ drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 6 ++++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index 990746955216..f35385266fbf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -43,9 +43,11 @@ static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi) if (axi->axi_xit_frm) value |= DMA_AXI_LPI_XIT_FRM; + value &= ~DMA_AXI_WR_OSR_LMT; value |= (axi->axi_wr_osr_lmt & DMA_AXI_WR_OSR_LMT_MASK) << DMA_AXI_WR_OSR_LMT_SHIFT; + value &= ~DMA_AXI_RD_OSR_LMT; value |= (axi->axi_rd_osr_lmt & DMA_AXI_RD_OSR_LMT_MASK) << DMA_AXI_RD_OSR_LMT_SHIFT; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 116151cd6a95..32bc2fc73cdc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -30,9 +30,11 @@ static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi) if (axi->axi_xit_frm) value |= DMA_AXI_LPI_XIT_FRM; + value &= ~DMA_AXI_WR_OSR_LMT; value |= (axi->axi_wr_osr_lmt & DMA_AXI_OSR_MAX) << DMA_AXI_WR_OSR_LMT_SHIFT; + value &= ~DMA_AXI_RD_OSR_LMT; value |= (axi->axi_rd_osr_lmt & DMA_AXI_OSR_MAX) << DMA_AXI_RD_OSR_LMT_SHIFT; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index a840818bf4df..ac3d39c69509 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -126,8 +126,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) axi->axi_mb = of_property_read_bool(np, "snps,axi_mb"); axi->axi_rb = of_property_read_bool(np, "snps,axi_rb"); - of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt); - of_property_read_u32(np, "snps,rd_osr_lmt", &axi->axi_rd_osr_lmt); + if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt)) + axi->axi_wr_osr_lmt = 1; + if (of_property_read_u32(np, "snps,rd_osr_lmt", &axi->axi_rd_osr_lmt)) + axi->axi_rd_osr_lmt = 1; of_property_read_u32_array(np, "snps,blen", axi->axi_blen, AXI_BLEN); of_node_put(np); -- cgit v1.2.3 From efc45154828ae4e49c6b46f59882bfef32697d44 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 3 Dec 2016 17:31:45 +0100 Subject: uapi glibc compat: fix outer guard of net device flags enum Fix a wrong condition preventing the higher net device flags IFF_LOWER_UP etc to be defined if net/if.h is included before linux/if.h. The comment makes it clear the intention was to allow partial definition with either parts. This fixes compilation of userspace programs trying to use IFF_LOWER_UP, IFF_DORMANT or IFF_ECHO. Fixes: 4a91cb61bb99 ("uapi glibc compat: fix compile errors when glibc net/if.h included before linux/if.h") Signed-off-by: Jonas Gorski Reviewed-by: Mikko Rapeli Signed-off-by: David S. Miller --- include/uapi/linux/if.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index e601c8c3bdc7..1158a043342a 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -31,7 +31,7 @@ #include /* For glibc compatibility. An empty enum does not compile. */ -#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && \ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || \ __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 /** * enum net_device_flags - &struct net_device flags @@ -99,7 +99,7 @@ enum net_device_flags { IFF_ECHO = 1<<18, /* volatile */ #endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ }; -#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ /* for compatibility with glibc net/if.h */ #if __UAPI_DEF_IF_NET_DEVICE_FLAGS -- cgit v1.2.3 From dcb17d22e1c2cd72e72190c736349a675362b3bc Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 5 Dec 2016 18:37:13 -0200 Subject: tcp: warn on bogus MSS and try to amend it There have been some reports lately about TCP connection stalls caused by NIC drivers that aren't setting gso_size on aggregated packets on rx path. This causes TCP to assume that the MSS is actually the size of the aggregated packet, which is invalid. Although the proper fix is to be done at each driver, it's often hard and cumbersome for one to debug, come to such root cause and report/fix it. This patch amends this situation in two ways. First, it adds a warning on when this situation occurs, so it gives a hint to those trying to debug this. It also limit the maximum probed MSS to the adverised MSS, as it should never be any higher than that. The result is that the connection may not have the best performance ever but it shouldn't stall, and the admin will have a hint on what to look for. Tested with virtio by forcing gso_size to 0. v2: updated msg per David's suggestion v3: use skb_iif to find the interface and also log its name, per Eric Dumazet's suggestion. As the skb may be backlogged and the interface gone by then, we need to check if the number still has a meaning. v4: use helper tcp_gro_dev_warn() and avoid pr_warn_once inside __once, per David's suggestion Cc: Jonathan Maxwell Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a27b9c0e27c0..c71d49ce0c93 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -128,6 +128,23 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define REXMIT_LOST 1 /* retransmit packets marked lost */ #define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ +static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) +{ + static bool __once __read_mostly; + + if (!__once) { + struct net_device *dev; + + __once = true; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); + pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", + dev ? dev->name : "Unknown driver"); + rcu_read_unlock(); + } +} + /* Adapt the MSS value used to make delayed ack decision to the * real world. */ @@ -144,7 +161,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) */ len = skb_shinfo(skb)->gso_size ? : skb->len; if (len >= icsk->icsk_ack.rcv_mss) { - icsk->icsk_ack.rcv_mss = len; + icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, + tcp_sk(sk)->advmss); + if (unlikely(icsk->icsk_ack.rcv_mss != len)) + tcp_gro_dev_warn(sk, skb); } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. -- cgit v1.2.3 From e37e2ff350a321ad9c36b588e76f34fbba305be6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Dec 2016 18:10:58 -0800 Subject: virtio-net: Fix DMA-from-the-stack in virtnet_set_mac_address() With CONFIG_VMAP_STACK=y, virtnet_set_mac_address() can be passed a pointer to the stack and it will OOPS. Copy the address to the heap to prevent the crash. Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Laura Abbott Reported-by: zbyszek@in.waw.pl Signed-off-by: Andy Lutomirski Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7276d5a95bd0..cbf1c613c67a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -969,12 +969,17 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) struct virtnet_info *vi = netdev_priv(dev); struct virtio_device *vdev = vi->vdev; int ret; - struct sockaddr *addr = p; + struct sockaddr *addr; struct scatterlist sg; - ret = eth_prepare_mac_addr_change(dev, p); + addr = kmalloc(sizeof(*addr), GFP_KERNEL); + if (!addr) + return -ENOMEM; + memcpy(addr, p, sizeof(*addr)); + + ret = eth_prepare_mac_addr_change(dev, addr); if (ret) - return ret; + goto out; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { sg_init_one(&sg, addr->sa_data, dev->addr_len); @@ -982,7 +987,8 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { dev_warn(&vdev->dev, "Failed to set mac address by vq command.\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { @@ -996,8 +1002,11 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) } eth_commit_mac_addr_change(dev, p); + ret = 0; - return 0; +out: + kfree(addr); + return ret; } static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, -- cgit v1.2.3 From d14584d91976c42c7178164665c4959495740939 Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Tue, 6 Dec 2016 00:33:50 -0500 Subject: be2net: Add DEVSEC privilege to SET_HSW_CONFIG command. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OPCODE_COMMON_GET_FN_PRIVILEGES is returning only DEVSEC privilege (Unrestricted Administrative Privilege) for Lancer NIC functions. So, driver is failing SET_HSW_CONFIG command, as DEVSEC privilege was not set in the privilege bitmap. This patch fixes the problem by setting DEVSEC privilege in SET_HSW_CONFIG’s privilege bitmap. Signed-off-by: Venkat Duvvuru Signed-off-by: Suresh Reddy Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 1fb5d7239254..0e74529a4209 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -90,7 +90,8 @@ static struct be_cmd_priv_map cmd_priv_map[] = { { OPCODE_COMMON_SET_HSW_CONFIG, CMD_SUBSYSTEM_COMMON, - BE_PRIV_DEVCFG | BE_PRIV_VHADM + BE_PRIV_DEVCFG | BE_PRIV_VHADM | + BE_PRIV_DEVSEC }, { OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, -- cgit v1.2.3 From f85de6666347c974cdf97b1026180995d912d7d0 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Tue, 6 Dec 2016 09:26:53 +0300 Subject: net: fec: fix compile with CONFIG_M5272 Commit 80cca775cdc4 ("net: fec: cache statistics while device is down") introduced unconditional statistics-related actions. However, when driver is compiled with CONFIG_M5272, staticsics-related definitions do not exist, which results into build errors. Fix that by adding explicit handling of !defined(CONFIG_M5272) case. Fixes: 80cca775cdc4 ("net: fec: cache statistics while device is down") Signed-off-by: Nikita Yushchenko Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 5f77caa59534..12aef1b15356 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2313,6 +2313,8 @@ static const struct fec_stat { { "IEEE_rx_octets_ok", IEEE_R_OCTETS_OK }, }; +#define FEC_STATS_SIZE (ARRAY_SIZE(fec_stats) * sizeof(u64)) + static void fec_enet_update_ethtool_stats(struct net_device *dev) { struct fec_enet_private *fep = netdev_priv(dev); @@ -2330,7 +2332,7 @@ static void fec_enet_get_ethtool_stats(struct net_device *dev, if (netif_running(dev)) fec_enet_update_ethtool_stats(dev); - memcpy(data, fep->ethtool_stats, ARRAY_SIZE(fec_stats) * sizeof(u64)); + memcpy(data, fep->ethtool_stats, FEC_STATS_SIZE); } static void fec_enet_get_strings(struct net_device *netdev, @@ -2355,6 +2357,12 @@ static int fec_enet_get_sset_count(struct net_device *dev, int sset) return -EOPNOTSUPP; } } + +#else /* !defined(CONFIG_M5272) */ +#define FEC_STATS_SIZE 0 +static inline void fec_enet_update_ethtool_stats(struct net_device *dev) +{ +} #endif /* !defined(CONFIG_M5272) */ static int fec_enet_nway_reset(struct net_device *dev) @@ -3293,8 +3301,7 @@ fec_probe(struct platform_device *pdev) /* Init network device */ ndev = alloc_etherdev_mqs(sizeof(struct fec_enet_private) + - ARRAY_SIZE(fec_stats) * sizeof(u64), - num_tx_qs, num_rx_qs); + FEC_STATS_SIZE, num_tx_qs, num_rx_qs); if (!ndev) return -ENOMEM; -- cgit v1.2.3 From f663ad98623926b8d7bdef4b4648d10c0229aebe Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 6 Dec 2016 17:32:43 +0200 Subject: net/mlx5: Verify module parameters Verify the mlx5_core module parameters by making sure that they are in the expected range and if they aren't restore them to their default values. Fixes: 9603b61de1ee ('mlx5: Move pci device handling from mlx5_ib to mlx5_core') Signed-off-by: Kamal Heib Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 27 +++++++++++++--------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3b7c6a9f2b5f..22eb3be06651 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -62,13 +62,13 @@ MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRIVER_VERSION); -int mlx5_core_debug_mask; -module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644); +unsigned int mlx5_core_debug_mask; +module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644); MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); #define MLX5_DEFAULT_PROF 2 -static int prof_sel = MLX5_DEFAULT_PROF; -module_param_named(prof_sel, prof_sel, int, 0444); +static unsigned int prof_sel = MLX5_DEFAULT_PROF; +module_param_named(prof_sel, prof_sel, uint, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); enum { @@ -1227,13 +1227,6 @@ static int init_one(struct pci_dev *pdev, dev->pdev = pdev; dev->event = mlx5_core_event; - - if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) { - mlx5_core_warn(dev, - "selected profile out of range, selecting default (%d)\n", - MLX5_DEFAULT_PROF); - prof_sel = MLX5_DEFAULT_PROF; - } dev->profile = &profile[prof_sel]; INIT_LIST_HEAD(&priv->ctx_list); @@ -1450,10 +1443,22 @@ static struct pci_driver mlx5_core_driver = { .sriov_configure = mlx5_core_sriov_configure, }; +static void mlx5_core_verify_params(void) +{ + if (prof_sel >= ARRAY_SIZE(profile)) { + pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n", + prof_sel, + ARRAY_SIZE(profile) - 1, + MLX5_DEFAULT_PROF); + prof_sel = MLX5_DEFAULT_PROF; + } +} + static int __init init(void) { int err; + mlx5_core_verify_params(); mlx5_register_debugfs(); err = pci_register_driver(&mlx5_core_driver); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 187662c8ea96..20d16b137e03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -44,7 +44,7 @@ #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev)) -extern int mlx5_core_debug_mask; +extern uint mlx5_core_debug_mask; #define mlx5_core_dbg(__dev, format, ...) \ dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ -- cgit v1.2.3 From 9e5b2fc1d39b3122e2028849d0edc5df1d1a4761 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 6 Dec 2016 17:32:44 +0200 Subject: net/mlx5: Remove duplicate pci dev name print Remove duplicate pci dev name printing from mlx5_core_warn/dbg. Fixes: 5a7883989b1c ('net/mlx5_core: Improve mlx5 messages') Signed-off-by: Kamal Heib Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 20d16b137e03..2ce03464e622 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -47,8 +47,8 @@ extern uint mlx5_core_debug_mask; #define mlx5_core_dbg(__dev, format, ...) \ - dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ - (__dev)->priv.name, __func__, __LINE__, current->pid, \ + dev_dbg(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ ##__VA_ARGS__) #define mlx5_core_dbg_mask(__dev, mask, format, ...) \ @@ -63,8 +63,8 @@ do { \ ##__VA_ARGS__) #define mlx5_core_warn(__dev, format, ...) \ - dev_warn(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ - (__dev)->priv.name, __func__, __LINE__, current->pid, \ + dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ ##__VA_ARGS__) #define mlx5_core_info(__dev, format, ...) \ -- cgit v1.2.3 From f9c14e46748be9a2adafdb7d216f6cdeb435aadc Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 6 Dec 2016 17:32:45 +0200 Subject: net/mlx5: Fix query ISSI flow In old FWs query ISSI command is not supported and for some of those FWs it might fail with status other than "MLX5_CMD_STAT_BAD_OP_ERR". In such case instead of failing the driver load, we will treat any FW status other than 0 for Query ISSI FW command as ISSI not supported and assume ISSI=0 (most basic driver/FW interface). In case of driver syndrom (query ISSI failure by driver) we will fail driver load. Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Kamal Heib Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 5 ----- drivers/net/ethernet/mellanox/mlx5/core/main.c | 15 +++++++++------ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 5 +++++ 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1e639f886021..bfe410e8a469 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -268,11 +268,6 @@ static void dump_buf(void *buf, int size, int data_only, int offset) pr_debug("\n"); } -enum { - MLX5_DRIVER_STATUS_ABORTED = 0xfe, - MLX5_DRIVER_SYND = 0xbadd00de, -}; - static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, u32 *synd, u8 *status) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 22eb3be06651..ada24e103b02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -732,13 +732,15 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) u8 status; mlx5_cmd_mbox_status(query_out, &status, &syndrome); - if (status == MLX5_CMD_STAT_BAD_OP_ERR) { - pr_debug("Only ISSI 0 is supported\n"); - return 0; + if (!status || syndrome == MLX5_DRIVER_SYND) { + mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n", + err, status, syndrome); + return err; } - pr_err("failed to query ISSI err(%d)\n", err); - return err; + mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n"); + dev->issi = 0; + return 0; } sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); @@ -752,7 +754,8 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), set_out, sizeof(set_out)); if (err) { - pr_err("failed to set ISSI=1 err(%d)\n", err); + mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n", + err); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 2ce03464e622..63b9a0dba885 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -75,6 +75,11 @@ enum { MLX5_CMD_TIME, /* print command execution time */ }; +enum { + MLX5_DRIVER_STATUS_ABORTED = 0xfe, + MLX5_DRIVER_SYND = 0xbadd00de, +}; + int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); -- cgit v1.2.3 From b8335d91b472289939e26428dfa88c54aee3b739 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 6 Dec 2016 17:32:46 +0200 Subject: net/mlx5e: Don't notify HW when filling the edge of ICO SQ We are going to do this a couple of steps ahead anyway. Fixes: d3c9bc2743dc ("net/mlx5e: Added ICO SQs") Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index c6de6fba5843..e9abb6dfe393 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -340,7 +340,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; sq->db.ico_wqe[pi].num_wqebbs = 1; - mlx5e_send_nop(sq, true); + mlx5e_send_nop(sq, false); } wqe = mlx5_wq_cyc_get_wqe(wq, pi); -- cgit v1.2.3 From 3c8591d593a3da9ae8e8342acb1f6ab9ab478e92 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 6 Dec 2016 17:32:47 +0200 Subject: net/mlx5e: Don't flush SQ on error We are doing SQ descriptors cleanup in driver. Fixes: 6e8dd6d6f4bd ("net/mlx5e: Don't wait for SQ completions on close") Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 84e8b250e2af..5bf7f86fe31c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1006,7 +1006,6 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); - MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, uar_page, sq->uar.index); -- cgit v1.2.3 From c0f1147d14e4b09018a495c5095094e5707a4f44 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 6 Dec 2016 17:32:48 +0200 Subject: net/mlx5e: Change the SQ/RQ operational state to positive logic When using the negative logic (i.e. FLUSH state), after the RQ/SQ reopen we will have a time interval that the RQ/SQ is not really ready and the state indicates that its not in FLUSH state because the initial SQ/RQ struct memory starts as zeros. Now we changed the state to indicate if the SQ/RQ is opened and we will set the READY state after finishing preparing all the SQ/RQ resources. Fixes: 6e8dd6d6f4bd ("net/mlx5e: Don't wait for SQ completions on close") Fixes: f2fde18c52a7 ("net/mlx5e: Don't wait for RQ completions on close") Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 14 +++++++++----- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 4 ++-- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7a43502a89cc..71382df59fc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -241,7 +241,7 @@ struct mlx5e_tstamp { }; enum { - MLX5E_RQ_STATE_FLUSH, + MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, MLX5E_RQ_STATE_AM, }; @@ -394,7 +394,7 @@ struct mlx5e_sq_dma { }; enum { - MLX5E_SQ_STATE_FLUSH, + MLX5E_SQ_STATE_ENABLED, MLX5E_SQ_STATE_BF_ENABLE, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5bf7f86fe31c..246d98ebb588 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -759,6 +759,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (err) goto err_destroy_rq; + set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) goto err_disable_rq; @@ -773,6 +774,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, return 0; err_disable_rq: + clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); mlx5e_disable_rq(rq); err_destroy_rq: mlx5e_destroy_rq(rq); @@ -782,7 +784,7 @@ err_destroy_rq: static void mlx5e_close_rq(struct mlx5e_rq *rq) { - set_bit(MLX5E_RQ_STATE_FLUSH, &rq->state); + clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ cancel_work_sync(&rq->am.work); @@ -1082,6 +1084,7 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, if (err) goto err_destroy_sq; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, false, 0); if (err) @@ -1095,6 +1098,7 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, return 0; err_disable_sq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); mlx5e_disable_sq(sq); err_destroy_sq: mlx5e_destroy_sq(sq); @@ -1111,7 +1115,7 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { - set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state); + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); /* prevent netif_tx_wake_queue */ napi_synchronize(&sq->channel->napi); @@ -3091,7 +3095,7 @@ static void mlx5e_tx_timeout(struct net_device *dev) if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) continue; sched_work = true; - set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state); + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n", i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc); } @@ -3146,13 +3150,13 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) for (i = 0; i < priv->params.num_channels; i++) { struct mlx5e_channel *c = priv->channel[i]; - set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); napi_synchronize(&c->napi); /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ old_prog = xchg(&c->rq.xdp_prog, prog); - clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); /* napi_schedule in case we have missed anything */ set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); napi_schedule(&c->napi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e9abb6dfe393..33495d88aeb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -412,7 +412,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); - if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) { mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); return; } @@ -445,7 +445,7 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) } #define RQ_CANNOT_POST(rq) \ - (test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state) || \ + (!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state) || \ test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) @@ -924,7 +924,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; int work_done = 0; - if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return 0; if (cq->decmprs_left) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 70a717382357..cfb68371c397 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -409,7 +409,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_sq, cq); - if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; npkts = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 5703f19a6a24..e5c12a732aa1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -56,7 +56,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) struct mlx5_cqe64 *cqe; u16 sqcc; - if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return; cqe = mlx5e_get_cqe(cq); @@ -113,7 +113,7 @@ static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) sq = container_of(cq, struct mlx5e_sq, cq); - if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), -- cgit v1.2.3 From 10d20bd25e06b220b1d816228b036e367215dc60 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 5 Dec 2016 12:10:29 -0800 Subject: shmem: fix shm fallocate() list corruption The shmem hole punching with fallocate(FALLOC_FL_PUNCH_HOLE) does not want to race with generating new pages by faulting them in. However, the wait-queue used to delay the page faulting has a serious problem: the wait queue head (in shmem_fallocate()) is allocated on the stack, and the code expects that "wake_up_all()" will make sure that all the queue entries are gone before the stack frame is de-allocated. And that is not at all necessarily the case. Yes, a normal wake-up sequence will remove the wait-queue entry that caused the wakeup (see "autoremove_wake_function()"), but the key wording there is "that caused the wakeup". When there are multiple possible wakeup sources, the wait queue entry may well stay around. And _particularly_ in a page fault path, we may be faulting in new pages from user space while we also have other things going on, and there may well be other pending wakeups. So despite the "wake_up_all()", it's not at all guaranteed that all list entries are removed from the wait queue head on the stack. Fix this by introducing a new wakeup function that removes the list entry unconditionally, even if the target process had already woken up for other reasons. Use that "synchronous" function to set up the waiters in shmem_fault(). This problem has never been seen in the wild afaik, but Dave Jones has reported it on and off while running trinity. We thought we fixed the stack corruption with the blk-mq rq_list locking fix (commit 7fe311302f7d: "blk-mq: update hardware and software queues for sleeping alloc"), but it turns out there was _another_ stack corruptor hiding in the trinity runs. Vegard Nossum (also running trinity) was able to trigger this one fairly consistently, and made us look once again at the shmem code due to the faults often being in that area. Reported-and-tested-by: Vegard Nossum . Reported-by: Dave Jones Signed-off-by: Linus Torvalds --- mm/shmem.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 166ebf5d2bce..9d32e1cb9f38 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1848,6 +1848,18 @@ unlock: return error; } +/* + * This is like autoremove_wake_function, but it removes the wait queue + * entry unconditionally - even if something else had already woken the + * target. + */ +static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + int ret = default_wake_function(wait, mode, sync, key); + list_del_init(&wait->task_list); + return ret; +} + static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct inode *inode = file_inode(vma->vm_file); @@ -1883,7 +1895,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) vmf->pgoff >= shmem_falloc->start && vmf->pgoff < shmem_falloc->next) { wait_queue_head_t *shmem_falloc_waitq; - DEFINE_WAIT(shmem_fault_wait); + DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function); ret = VM_FAULT_NOPAGE; if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && @@ -2665,6 +2677,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, spin_lock(&inode->i_lock); inode->i_private = NULL; wake_up_all(&shmem_falloc_waitq); + WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.task_list)); spin_unlock(&inode->i_lock); error = 0; goto out; -- cgit v1.2.3 From 163117e8d4fd7a235ec48479e31bbda0c74eff56 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 1 Dec 2016 08:48:30 +0300 Subject: dbri: move dereference after check for NULL We accidentally introduced a dereference before the NULL check in xmit_descs() as part of silencing a GCC warning. Fixes: 16f46050e709 ("dbri: Fix compiler warning") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- sound/sparc/dbri.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 3fe4468ea2c5..52063b262667 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -1702,7 +1702,7 @@ interrupts are disabled. static void xmit_descs(struct snd_dbri *dbri) { struct dbri_streaminfo *info; - u32 dvma_addr = (u32)dbri->dma_dvma; + u32 dvma_addr; s32 *cmd; unsigned long flags; int first_td; @@ -1710,6 +1710,7 @@ static void xmit_descs(struct snd_dbri *dbri) if (dbri == NULL) return; /* Disabled */ + dvma_addr = (u32)dbri->dma_dvma; info = &dbri->stream_info[DBRI_REC]; spin_lock_irqsave(&dbri->lock, flags); -- cgit v1.2.3