From 3b2c1710fac7fb278b760d1545e637cbb5ea5b5b Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 13 Jul 2016 16:32:03 +0300 Subject: drm/i915: Wait up to 3ms for the pcu to ack the cdclk change request on SKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bspec tells us to keep bashing the PCU for up to 3ms when trying to inform it about an upcoming change in the cdclk frequency. Currently we only keep at it for 15*10usec (+ whatever delays gets added by the sandybridge_pcode_read() itself). Let's change the limit to 3ms. I decided to keep 10 usec delay per iteration for now, even though the spec doesn't really tell us to do that. Cc: stable@vger.kernel.org Fixes: 5d96d8afcfbb ("drm/i915/skl: Deinit/init the display at suspend/resume") Cc: David Weinehall Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468416723-23440-1-git-send-email-ville.syrjala@linux.intel.com Tested-by: David Weinehall Reviewed-by: Chris Wilson (cherry picked from commit 848496e5902833600f7992f4faa82dc1546051ba) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9f56ac11975f..e5c46a6ec65b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5691,15 +5691,7 @@ static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv) static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv) { - unsigned int i; - - for (i = 0; i < 15; i++) { - if (skl_cdclk_pcu_ready(dev_priv)) - return true; - udelay(10); - } - - return false; + return _wait_for(skl_cdclk_pcu_ready(dev_priv), 3000, 10) == 0; } static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) -- cgit v1.2.3 From a7b4667a00025ac28300737c868bd4818b6d8c4d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 May 2016 17:42:48 +0300 Subject: drm/i915: Never fully mask the the EI up rps interrupt on SNB/IVB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SNB (and IVB too I suppose) starts to misbehave if the GPU gets stuck in an infinite batch buffer loop. The GPU apparently hogs something critical and CPUs start to lose interrupts and whatnot. We can keep the system limping along by unmasking some interrupts in GEN6_PMINTRMSK. The EI up interrupt has been previously chosen for that task, so let's never mask it. v2: s/gen6_rps_pm_mask/gen6_sanitize_rps_pm_mask/ (Chris) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93122 Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1464014568-4529-1-git-send-email-ville.syrjala@linux.intel.com Cc: stable@vger.kernel.org (cherry picked from commit 12c100bfa5d9103b6c4d43636fee09c31e75605a) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5a8ee0c76593..5ca68d2ed7d4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4892,7 +4892,8 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) else gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); dev_priv->rps.last_adj = 0; - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); + I915_WRITE(GEN6_PMINTRMSK, + gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } mutex_unlock(&dev_priv->rps.hw_lock); -- cgit v1.2.3 From f005bd7e3b84a353475a2895e2c7686a66297d87 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Aug 2016 10:54:15 +0100 Subject: clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered The ARM architected timer produces level-triggered interrupts (this is mandated by the architecture). Unfortunately, a number of device-trees get this wrong, and expose an edge-triggered interrupt. Until now, this wasn't too much an issue, as the programming of the trigger would fail (the corresponding PPI cannot be reconfigured), and the kernel would be happy with this. But we're about to change this, and trust DT a lot if the driver doesn't provide its own trigger information. In that context, the timer breaks badly. While we do need to fix the DTs, there is also some userspace out there (kvmtool) that generates the same kind of broken DT on the fly, and that will completely break with newer kernels. As a safety measure, and to keep buggy software alive as well as buying us some time to fix DTs all over the place, let's check what trigger configuration has been given us by the firmware. If this is not a level configuration, then we know that the DT/ACPI configuration is bust, and we pick some defaults which won't be worse than the existing setup. Signed-off-by: Marc Zyngier Cc: Andrew Lunn Cc: Liu Gang Cc: Mark Rutland Cc: Masahiro Yamada Cc: Wenbin Song Cc: Mingkai Hu Cc: Florian Fainelli Cc: Kevin Hilman Cc: Daniel Lezcano Cc: Michal Simek Cc: Jon Hunter Cc: arm@kernel.org Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-arm-kernel@lists.infradead.org Cc: Sebastian Hesselbarth Cc: Jason Cooper Cc: Ray Jui Cc: "Hou Zhiqiang" Cc: Tirumalesh Chalamarla Cc: linux-samsung-soc@vger.kernel.org Cc: Yuan Yao Cc: Jan Glauber Cc: Gregory Clement Cc: linux-amlogic@lists.infradead.org Cc: soren.brinkmann@xilinx.com Cc: Rajesh Bhagat Cc: Scott Branden Cc: Duc Dang Cc: Kukjin Kim Cc: Carlo Caione Cc: Dinh Nguyen Link: http://lkml.kernel.org/r/1470045256-9032-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/clocksource/arm_arch_timer.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 28bce3f4f81d..57700541f951 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -8,6 +8,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) "arm_arch_timer: " fmt + #include #include #include @@ -370,16 +373,33 @@ static bool arch_timer_has_nonsecure_ppi(void) arch_timer_ppi[PHYS_NONSECURE_PPI]); } +static u32 check_ppi_trigger(int irq) +{ + u32 flags = irq_get_trigger_type(irq); + + if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) { + pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq); + pr_warn("WARNING: Please fix your firmware\n"); + flags = IRQF_TRIGGER_LOW; + } + + return flags; +} + static int arch_timer_starting_cpu(unsigned int cpu) { struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt); + u32 flags; __arch_timer_setup(ARCH_CP15_TIMER, clk); - enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], 0); + flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]); + enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags); - if (arch_timer_has_nonsecure_ppi()) - enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0); + if (arch_timer_has_nonsecure_ppi()) { + flags = check_ppi_trigger(arch_timer_ppi[PHYS_NONSECURE_PPI]); + enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], flags); + } arch_counter_set_user_access(); if (evtstrm_enable) -- cgit v1.2.3 From 7f555c8e5a84b348c2b76f4ca78eae7222354c03 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 29 Jul 2016 18:03:42 -0400 Subject: drm/amdgpu/gmc7: add missing mullins case Looks like this got missed when we ported the code from radeon. Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index d24a82bd0c7a..0b0f08641eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -144,6 +144,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) break; case CHIP_KAVERI: case CHIP_KABINI: + case CHIP_MULLINS: return 0; default: BUG(); } -- cgit v1.2.3 From 903f75c8958e21f0fba2520467c11eaca824226d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 29 Jul 2016 18:14:39 -0400 Subject: drm/amdgpu/ci: add mullins to default case for smc ucode It's already covered by the default case, but add it for consistency. Reviewed-by: Alexandre Demers Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index e2f0e5d58d5c..a5c94b482459 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -5779,6 +5779,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev) break; case CHIP_KAVERI: case CHIP_KABINI: + case CHIP_MULLINS: default: BUG(); } -- cgit v1.2.3 From 4f7ec157cb6dfb1e2cd18abf21e6cb1107842c5f Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Thu, 28 Jul 2016 17:25:01 -0400 Subject: drm/amd/amdgpu: change pptable output format from ASCII to binary Reviewed-by: Edward O'Callaghan Reviewed-by: Alex Deucher Signed-off-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index ff63b88b0ffa..5cc7052e391d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -305,7 +305,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; char *table = NULL; - int size, i; + int size; if (adev->pp_enabled) size = amdgpu_dpm_get_pp_table(adev, &table); @@ -315,10 +315,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, if (size >= PAGE_SIZE) size = PAGE_SIZE - 1; - for (i = 0; i < size; i++) { - sprintf(buf + i, "%02x", table[i]); - } - sprintf(buf + i, "\n"); + memcpy(buf, table, size); return size; } -- cgit v1.2.3 From 300108740b659c2380a731f147dd85ca0365db4f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 19 Jul 2016 13:04:14 -0700 Subject: lkdtm: fix false positive warning from -Wmaybe-uninitialized The variable in use here doesn't matter (it's just used to exercise taking up stack space), but this changes its use to pass its address instead, to avoid a compiler warning: drivers/misc/lkdtm_usercopy.c:54:15: warning: 'bad_stack' may be used uninitialized in this function [-Wmaybe-uninitialized] Reported-by: Arnd Bergmann Signed-off-by: Kees Cook --- drivers/misc/lkdtm_usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c index 5a3fd76eec27..5525a204db93 100644 --- a/drivers/misc/lkdtm_usercopy.c +++ b/drivers/misc/lkdtm_usercopy.c @@ -49,7 +49,7 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame) /* This is a pointer to outside our current stack frame. */ if (bad_frame) { - bad_stack = do_usercopy_stack_callee((uintptr_t)bad_stack); + bad_stack = do_usercopy_stack_callee((uintptr_t)&bad_stack); } else { /* Put start address just inside stack. */ bad_stack = task_stack_page(current) + THREAD_SIZE; -- cgit v1.2.3 From e50bd2354ced2018eff1c7e06e7db4db1f5ce745 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 1 Aug 2016 14:18:34 -0700 Subject: lkdtm: Fix targets for objcopy usage The targets for lkdtm's objcopy were missing which caused them to always be rebuilt. This corrects the problem. Reported-by: Linus Torvalds Signed-off-by: Kees Cook --- drivers/misc/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 4387ccb79e64..7410c6d9a34d 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -69,5 +69,6 @@ OBJCOPYFLAGS := OBJCOPYFLAGS_lkdtm_rodata_objcopy.o := \ --set-section-flags .text=alloc,readonly \ --rename-section .text=.rodata -$(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o +targets += lkdtm_rodata.o lkdtm_rodata_objcopy.o +$(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o FORCE $(call if_changed,objcopy) -- cgit v1.2.3 From fe85f07f582751da0a65d4ba7e3b36ebcbe55019 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 2 Aug 2016 11:21:35 +0800 Subject: drm/amdgpu: update golden setting of iceland Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index bff8668e9e6d..a4983237baa0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -409,6 +409,7 @@ static const u32 golden_settings_iceland_a11[] = mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, mmTCC_CTRL, 0x00100000, 0xf31fff7f, -- cgit v1.2.3 From 3a494b588f724fde00ae3ca0aa1c7e6cef1d0d51 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 2 Aug 2016 11:28:47 +0800 Subject: drm/amdgpu: update golden setting of carrizo Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a4983237baa0..85854ba2cf0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -506,8 +506,10 @@ static const u32 cz_golden_settings_a11[] = mmGB_GPU_ID, 0x0000000f, 0x00000000, mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, mmTA_CNTL_AUX, 0x000f000f, 0x00010000, + mmTCC_CTRL, 0x00100000, 0xf31fff7f, mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 -- cgit v1.2.3 From 9761bc53c8e1c52f3f0aa4334f38fd87b0cf1613 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 2 Aug 2016 11:26:34 +0800 Subject: drm/amdgpu: update golden setting of polaris11 Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Reviewed-by: Ken Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 85854ba2cf0c..3b7f6e87d244 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -270,7 +270,8 @@ static const u32 tonga_mgcg_cgcg_init[] = static const u32 golden_settings_polaris11_a11[] = { - mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208, + mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, + mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, mmDB_DEBUG2, 0xf00fffff, 0x00000400, mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, @@ -279,7 +280,7 @@ static const u32 golden_settings_polaris11_a11[] = mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, - mmSQ_CONFIG, 0x07f80000, 0x07180000, + mmSQ_CONFIG, 0x07f80000, 0x01180000, mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, mmTCC_CTRL, 0x00100000, 0xf31fff7f, mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, -- cgit v1.2.3 From 6d51c813b172b4374fe7a6b732b6666f8d77bfea Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 2 Aug 2016 11:30:05 +0800 Subject: drm/amdgpu: update golden setting of stoney Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 717359d3ba8c..2aee2c6f3cd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -103,6 +103,11 @@ static const u32 stoney_mgcg_cgcg_init[] = mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 }; +static const u32 golden_settings_stoney_common[] = +{ + mmMC_HUB_RDREQ_UVD, MC_HUB_RDREQ_UVD__PRESCALE_MASK, 0x00000004, + mmMC_RD_GRP_OTH, MC_RD_GRP_OTH__UVD_MASK, 0x00600000 +}; static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -142,6 +147,9 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) amdgpu_program_register_sequence(adev, stoney_mgcg_cgcg_init, (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); + amdgpu_program_register_sequence(adev, + golden_settings_stoney_common, + (const u32)ARRAY_SIZE(golden_settings_stoney_common)); break; default: break; -- cgit v1.2.3 From a5a5e3084ec457f234298392dfa2c55db47b603f Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 2 Aug 2016 13:15:12 +0800 Subject: drm/amdgpu: update golden setting of polaris10 Signed-off-by: Huang Rui Reviewed-by: Ken Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 3b7f6e87d244..b8184617ca25 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -302,8 +302,8 @@ static const u32 polaris11_golden_common_all[] = static const u32 golden_settings_polaris10_a11[] = { mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, - mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, - mmCB_HW_CONTROL_2, 0, 0x0f000000, + mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, + mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, mmDB_DEBUG2, 0xf00fffff, 0x00000400, mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, -- cgit v1.2.3 From 0b857b44b5e445dc850cd91c45ce6edeb7797480 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 31 Jul 2016 00:27:39 -0700 Subject: nvme-rdma: Don't leak uninitialized memory in connect request private data Zero out the full nvme_rdma_cm_req structure before sending it. Otherwise we end up leaking kernel memory in the reserved field, which might break forward compatibility in the future. Signed-off-by: Roland Dreier Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3e3ce2b0424e..b96b88369871 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1269,7 +1269,7 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) { struct nvme_rdma_ctrl *ctrl = queue->ctrl; struct rdma_conn_param param = { }; - struct nvme_rdma_cm_req priv; + struct nvme_rdma_cm_req priv = { }; int ret; param.qp_num = queue->qp->qp_num; -- cgit v1.2.3 From 5f372eb3e76317b4fe4ba53ad1547f39fc883350 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 31 Jul 2016 18:43:15 +0300 Subject: nvme-rdma: Queue ns scanning after a sucessful reconnection On an ordered target shutdown, the target can send a AEN on a namespace removal, this will trigger the host to queue ns-list query. The shutdown will trigger error recovery which will attepmt periodic reconnect. We can hit a race where the ns rescanning fails (error recovery kicked in and we're not connected) causing removing all the namespaces and when we reconnect we won't see any namespaces for this controller. So, queue a namespace rescan after we successfully reconnected to the target. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b96b88369871..7fd1d735ced5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -748,8 +748,10 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - if (ctrl->queue_count > 1) + if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); + nvme_queue_scan(&ctrl->ctrl); + } dev_info(ctrl->ctrl.device, "Successfully reconnected\n"); -- cgit v1.2.3 From 57de5a0a40db97bb390d3ac1f4c2e74b9f3515c3 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 14 Jul 2016 17:39:47 +0300 Subject: nvme-rdma: Fix device removal handling Device removal sequence may have crashed because the controller (and admin queue space) was freed before we destroyed the admin queue resources. Thus we want to destroy the admin queue and only then queue controller deletion and wait for it to complete. More specifically we: 1. own the controller deletion (make sure we are not competing with another deletion). 2. get rid of inflight reconnects if exists (which also destroy and create queues). 3. destroy the queue. 4. safely queue controller deletion (and wait for it to complete). Reported-by: Steve Wise Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 7fd1d735ced5..3ffec3728abe 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -169,7 +169,6 @@ MODULE_PARM_DESC(register_always, static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); -static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl); /* XXX: really should move to a generic header sooner or later.. */ static inline void put_unaligned_le24(u32 val, u8 *p) @@ -1320,37 +1319,39 @@ out_destroy_queue_ib: * that caught the event. Since we hold the callout until the controller * deletion is completed, we'll deadlock if the controller deletion will * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership - * of destroying this queue before-hand, destroy the queue resources - * after the controller deletion completed with the exception of destroying - * the cm_id implicitely by returning a non-zero rc to the callout. + * of destroying this queue before-hand, destroy the queue resources, + * then queue the controller deletion which won't destroy this queue and + * we destroy the cm_id implicitely by returning a non-zero rc to the callout. */ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) { struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret, ctrl_deleted = 0; + int ret; - /* First disable the queue so ctrl delete won't free it */ - if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) - goto out; + /* Own the controller deletion */ + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) + return 0; - /* delete the controller */ - ret = __nvme_rdma_del_ctrl(ctrl); - if (!ret) { - dev_warn(ctrl->ctrl.device, - "Got rdma device removal event, deleting ctrl\n"); - flush_work(&ctrl->delete_work); + dev_warn(ctrl->ctrl.device, + "Got rdma device removal event, deleting ctrl\n"); - /* Return non-zero so the cm_id will destroy implicitly */ - ctrl_deleted = 1; + /* Get rid of reconnect work if its running */ + cancel_delayed_work_sync(&ctrl->reconnect_work); + /* Disable the queue so ctrl delete won't free it */ + if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) { /* Free this queue ourselves */ - rdma_disconnect(queue->cm_id); - ib_drain_qp(queue->qp); + nvme_rdma_stop_queue(queue); nvme_rdma_destroy_queue_ib(queue); + + /* Return non-zero so the cm_id will destroy implicitly */ + ret = 1; } -out: - return ctrl_deleted; + /* Queue controller deletion */ + queue_work(nvme_rdma_wq, &ctrl->delete_work); + flush_work(&ctrl->delete_work); + return ret; } static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, -- cgit v1.2.3 From 2461a8dd38bea3cb5b1c1f0323794483292fb03f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Jul 2016 09:29:51 +0300 Subject: nvme-rdma: Remove duplicate call to nvme_remove_namespaces nvme_uninit_ctrl already does that for us. Note that we reordered nvme_rdma_shutdown_ctrl and nvme_uninit_ctrl, this is perfectly fine because we actually want ctrl uninit (aen, scan cancellation and namespaces removal) to happen before we shutdown the rdma resources. Also, centralize the deletion work and the dead controller removal work code duplication into __nvme_rdma_shutdown_ctrl that accepts a shutdown boolean. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3ffec3728abe..1279bc292253 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1660,15 +1660,20 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_rdma_destroy_admin_queue(ctrl); } +static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) +{ + nvme_uninit_ctrl(&ctrl->ctrl); + if (shutdown) + nvme_rdma_shutdown_ctrl(ctrl); + nvme_put_ctrl(&ctrl->ctrl); +} + static void nvme_rdma_del_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, delete_work); - nvme_remove_namespaces(&ctrl->ctrl); - nvme_rdma_shutdown_ctrl(ctrl); - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); + __nvme_rdma_remove_ctrl(ctrl, true); } static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) @@ -1701,9 +1706,7 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work) struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, delete_work); - nvme_remove_namespaces(&ctrl->ctrl); - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); + __nvme_rdma_remove_ctrl(ctrl, false); } static void nvme_rdma_reset_ctrl_work(struct work_struct *work) -- cgit v1.2.3 From a34ca17a9717fe607cd58285a1704cb6526cf561 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Jul 2016 09:22:19 +0300 Subject: nvme-rdma: Free the I/O tags when we delete the controller If we wait until we free the controller (free_ctrl) we might lose our rdma device without any notification while we still have open resources (tags mrs and dma mappings). Instead, destroy the tags with their rdma resources once we delete the device and not when freeing it. Note that we don't do that in nvme_rdma_shutdown_ctrl because controller reset uses it as well and we want to give active I/O a chance to complete successfully. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1279bc292253..6378dc94aeaf 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -686,11 +686,6 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) list_del(&ctrl->list); mutex_unlock(&nvme_rdma_ctrl_mutex); - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_rdma_dev_put(ctrl->device); - } kfree(ctrl->queues); nvmf_free_options(nctrl->opts); free_ctrl: @@ -1665,6 +1660,13 @@ static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_uninit_ctrl(&ctrl->ctrl); if (shutdown) nvme_rdma_shutdown_ctrl(ctrl); + + if (ctrl->ctrl.tagset) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_free_tag_set(&ctrl->tag_set); + nvme_rdma_dev_put(ctrl->device); + } + nvme_put_ctrl(&ctrl->ctrl); } -- cgit v1.2.3 From a159c64d936eb0d1da29d8ad384183d8984899c9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Jul 2016 09:32:08 +0300 Subject: nvme-loop: Remove duplicate call to nvme_remove_namespaces nvme_uninit_ctrl already does that for us. Note that we reordered nvme_loop_shutdown_ctrl with nvme_uninit_ctrl but its safe because we want controller uninit to happen before we shutdown the transport resources. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 94e782987cc9..7affd40a6b33 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -414,9 +414,8 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work) struct nvme_loop_ctrl *ctrl = container_of(work, struct nvme_loop_ctrl, delete_work); - nvme_remove_namespaces(&ctrl->ctrl); - nvme_loop_shutdown_ctrl(ctrl); nvme_uninit_ctrl(&ctrl->ctrl); + nvme_loop_shutdown_ctrl(ctrl); nvme_put_ctrl(&ctrl->ctrl); } @@ -501,7 +500,6 @@ out_free_queues: nvme_loop_destroy_admin_queue(ctrl); out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); - nvme_remove_namespaces(&ctrl->ctrl); nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); } -- cgit v1.2.3 From 45862ebcc4883b1b6bc0701cd15cb2b68b140c5d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Jul 2016 09:26:16 +0300 Subject: nvme-rdma: Make sure to shutdown the controller if we can Relying on ctrl state in nvme_rdma_shutdown_ctrl is wrong because it will never be NVME_CTRL_LIVE (delete_ctrl or reset_ctrl invoked it). Instead, check that the admin queue is connected. Note that it is safe because we can never see a copmeting thread trying to destroy the admin queue (reset or delete controller). Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6378dc94aeaf..f4b836869d2c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1646,7 +1646,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_rdma_free_io_queues(ctrl); } - if (ctrl->ctrl.state == NVME_CTRL_LIVE) + if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags)) nvme_shutdown_ctrl(&ctrl->ctrl); blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); -- cgit v1.2.3 From c45eb4fed12d278d3619f1904885bd0d7bcbf036 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 18:34:45 +0100 Subject: drm/i915/fbdev: Check for the framebuffer before use If the fbdev probing fails, and in our error path we fail to clear the dev_priv->fbdev, then we can try and use a dangling fbdev pointer, and in particular a NULL fb. This could also happen in pathological cases where we try to operate on the fbdev prior to it being probed. Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1468431285-28264-2-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter Reviewed-by: Mika Kuoppala (cherry picked from commit 6bc265424df02f8162f4a17a37e4982e1c64460e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_fbdev.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 86b00c6db1a6..3e3632c18733 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -782,7 +782,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous struct intel_fbdev *ifbdev = dev_priv->fbdev; struct fb_info *info; - if (!ifbdev) + if (!ifbdev || !ifbdev->fb) return; info = ifbdev->helper.fbdev; @@ -827,31 +827,28 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous void intel_fbdev_output_poll_changed(struct drm_device *dev) { - struct drm_i915_private *dev_priv = to_i915(dev); - if (dev_priv->fbdev) - drm_fb_helper_hotplug_event(&dev_priv->fbdev->helper); + struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + + if (ifbdev && ifbdev->fb) + drm_fb_helper_hotplug_event(&ifbdev->helper); } void intel_fbdev_restore_mode(struct drm_device *dev) { - int ret; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_fbdev *ifbdev = dev_priv->fbdev; - struct drm_fb_helper *fb_helper; + struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; if (!ifbdev) return; intel_fbdev_sync(ifbdev); + if (!ifbdev->fb) + return; - fb_helper = &ifbdev->helper; - - ret = drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper); - if (ret) { + if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) { DRM_DEBUG("failed to restore crtc mode\n"); } else { - mutex_lock(&fb_helper->dev->struct_mutex); + mutex_lock(&dev->struct_mutex); intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); } } -- cgit v1.2.3 From d8f7750a08968b105056328652d2c332bdfa062d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 19 May 2016 15:24:55 +0300 Subject: nvmet-rdma: Correctly handle RDMA device hot removal When configuring a device attached listener, we may see device removal events. In this case we return a non-zero return code from the cm event handler which implicitly destroys the cm_id. It is possible that in the future the user will remove this listener and by that trigger a second call to rdma_destroy_id on an already destroyed cm_id -> BUG. In addition, when a queue bound (active session) cm_id generates a DEVICE_REMOVAL event we must guarantee all resources are cleaned up by the time we return from the event handler. Introduce nvmet_rdma_device_removal which addresses (or at least attempts to) both scenarios. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 87 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index e06d504bdf0c..48c811850c29 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state { NVMET_RDMA_Q_CONNECTING, NVMET_RDMA_Q_LIVE, NVMET_RDMA_Q_DISCONNECTING, + NVMET_RDMA_IN_DEVICE_REMOVAL, }; struct nvmet_rdma_queue { @@ -984,7 +985,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w) struct nvmet_rdma_device *dev = queue->dev; nvmet_rdma_free_queue(queue); - rdma_destroy_id(cm_id); + + if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL) + rdma_destroy_id(cm_id); + kref_put(&dev->ref, nvmet_rdma_free_dev); } @@ -1233,8 +1237,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) switch (queue->state) { case NVMET_RDMA_Q_CONNECTING: case NVMET_RDMA_Q_LIVE: - disconnect = true; queue->state = NVMET_RDMA_Q_DISCONNECTING; + case NVMET_RDMA_IN_DEVICE_REMOVAL: + disconnect = true; break; case NVMET_RDMA_Q_DISCONNECTING: break; @@ -1272,6 +1277,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, schedule_work(&queue->release_work); } +/** + * nvme_rdma_device_removal() - Handle RDMA device removal + * @queue: nvmet rdma queue (cm id qp_context) + * @addr: nvmet address (cm_id context) + * + * DEVICE_REMOVAL event notifies us that the RDMA device is about + * to unplug so we should take care of destroying our RDMA resources. + * This event will be generated for each allocated cm_id. + * + * Note that this event can be generated on a normal queue cm_id + * and/or a device bound listener cm_id (where in this case + * queue will be null). + * + * we claim ownership on destroying the cm_id. For queues we move + * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port + * we nullify the priv to prevent double cm_id destruction and destroying + * the cm_id implicitely by returning a non-zero rc to the callout. + */ +static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, + struct nvmet_rdma_queue *queue) +{ + unsigned long flags; + + if (!queue) { + struct nvmet_port *port = cm_id->context; + + /* + * This is a listener cm_id. Make sure that + * future remove_port won't invoke a double + * cm_id destroy. use atomic xchg to make sure + * we don't compete with remove_port. + */ + if (xchg(&port->priv, NULL) != cm_id) + return 0; + } else { + /* + * This is a queue cm_id. Make sure that + * release queue will not destroy the cm_id + * and schedule all ctrl queues removal (only + * if the queue is not disconnecting already). + */ + spin_lock_irqsave(&queue->state_lock, flags); + if (queue->state != NVMET_RDMA_Q_DISCONNECTING) + queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL; + spin_unlock_irqrestore(&queue->state_lock, flags); + nvmet_rdma_queue_disconnect(queue); + flush_scheduled_work(); + } + + /* + * We need to return 1 so that the core will destroy + * it's own ID. What a great API design.. + */ + return 1; +} + static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { @@ -1294,20 +1355,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_DISCONNECTED: - case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_TIMEWAIT_EXIT: - /* - * We can get the device removal callback even for a - * CM ID that we aren't actually using. In that case - * the context pointer is NULL, so we shouldn't try - * to disconnect a non-existing queue. But we also - * need to return 1 so that the core will destroy - * it's own ID. What a great API design.. - */ - if (queue) - nvmet_rdma_queue_disconnect(queue); - else - ret = 1; + nvmet_rdma_queue_disconnect(queue); + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + ret = nvmet_rdma_device_removal(cm_id, queue); break; case RDMA_CM_EVENT_REJECTED: case RDMA_CM_EVENT_UNREACHABLE: @@ -1396,9 +1448,10 @@ out_destroy_id: static void nvmet_rdma_remove_port(struct nvmet_port *port) { - struct rdma_cm_id *cm_id = port->priv; + struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); - rdma_destroy_id(cm_id); + if (cm_id) + rdma_destroy_id(cm_id); } static struct nvmet_fabrics_ops nvmet_rdma_ops = { -- cgit v1.2.3 From 40e64e07213201710a51e270595d6e6c028f9502 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 28 Jul 2016 18:04:09 +0300 Subject: nvmet-rdma: Don't use the inline buffer in order to avoid allocation for small reads Under extreme conditions this might cause data corruptions. By doing that we we repost the buffer and then post this buffer for the device to send. If we happen to use shared receive queues the device might write to the buffer before it sends it (there is no ordering between send and recv queues). Without SRQs we probably won't get that if the host doesn't mis-behave and send more than we allowed it, but relying on that is not really a good idea. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 48c811850c29..b4d648536c3e 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -616,15 +616,10 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, if (!len) return 0; - /* use the already allocated data buffer if possible */ - if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) { - nvmet_rdma_use_inline_sg(rsp, len, 0); - } else { - status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt, - len); - if (status) - return status; - } + status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt, + len); + if (status) + return status; ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, -- cgit v1.2.3 From 28b89118539da03f4b188763e1b2fd1aec0f580a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 4 Aug 2016 11:18:49 +0300 Subject: nvmet: Fix controller serial number inconsistency The host is allowed to issue identify as many times as it wants, we need to stay consistent when reporting the serial number for a given controller. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 6 +----- drivers/nvme/target/core.c | 4 ++++ drivers/nvme/target/nvmet.h | 1 + 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 2fac17a5ad53..47c564b5a289 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -13,7 +13,6 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include "nvmet.h" @@ -83,7 +82,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl *id; - u64 serial; u16 status = 0; id = kzalloc(sizeof(*id), GFP_KERNEL); @@ -96,10 +94,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->vid = 0; id->ssvid = 0; - /* generate a random serial number as our controllers are ephemeral: */ - get_random_bytes(&serial, sizeof(serial)); memset(id->sn, ' ', sizeof(id->sn)); - snprintf(id->sn, sizeof(id->sn), "%llx", serial); + snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial); memset(id->mn, ' ', sizeof(id->mn)); strncpy((char *)id->mn, "Linux", sizeof(id->mn)); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 8a891ca53367..6559d5afa7bf 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -13,6 +13,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include "nvmet.h" static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; @@ -728,6 +729,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); + /* generate a random serial number as our controllers are ephemeral: */ + get_random_bytes(&ctrl->serial, sizeof(ctrl->serial)); + kref_init(&ctrl->ref); ctrl->subsys = subsys; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 57dd6d834c28..76b6eedccaf9 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -113,6 +113,7 @@ struct nvmet_ctrl { struct mutex lock; u64 cap; + u64 serial; u32 cc; u32 csts; -- cgit v1.2.3 From 3ef1b4b298d98ccb3cc895baf1b18f7f9d073bee Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 4 Aug 2016 13:46:19 +0300 Subject: nvme-rdma: start async event handler after reconnecting to a controller When we reset or reconnect to a controller, we are cancelling the async event handler so we can safely re-establish resources, but we need to remember to start it again when we successfully reconnect. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f4b836869d2c..e82434ab3a28 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -745,6 +745,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); } dev_info(ctrl->ctrl.device, "Successfully reconnected\n"); @@ -1747,6 +1748,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); } return; -- cgit v1.2.3 From e3266378bdbca82c2854fc612fa9a391eba1f173 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 4 Aug 2016 17:37:40 +0300 Subject: nvme-rdma: Remove unused includes Signed-off-by: Sagi Grimberg Reviewed-by: Steve Wise --- drivers/nvme/host/rdma.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index e82434ab3a28..8d2875b4c56d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -12,13 +12,11 @@ * more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include #include #include -#include #include #include #include @@ -26,7 +24,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 34b58355ad1d9987267f071265a7de6c8e00662a Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Fri, 5 Aug 2016 18:36:10 +0900 Subject: drm/ttm: Wait for a BO to become idle before unbinding it from GTT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes hangs under memory pressure, e.g. running the piglit test tex3d-maxsize concurrently with other tests. Fixes: 17d33bc9d6ef ("drm/ttm: drop waiting for idle in ttm_bo_evict.") Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++-- drivers/gpu/drm/nouveau/nouveau_bo.c | 4 ++-- drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- drivers/gpu/drm/ttm/ttm_bo_util.c | 10 +++++++++- include/drm/ttm/ttm_bo_driver.h | 3 ++- 6 files changed, 19 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b7742e62972a..9b61c8ba7aaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -335,7 +335,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -368,7 +368,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 528bdeffb339..6190035edfea 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1151,7 +1151,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) goto out; - ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem); + ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, new_mem); out: ttm_bo_mem_put(bo, &tmp_mem); return ret; @@ -1179,7 +1179,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) return ret; - ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem); + ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, &tmp_mem); if (ret) goto out; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index ffdad81ef964..0c00e192c845 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -346,7 +346,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -379,7 +379,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo, if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4054d804fe06..42c074a9c955 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -354,7 +354,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) && !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) - ret = ttm_bo_move_ttm(bo, evict, no_wait_gpu, mem); + ret = ttm_bo_move_ttm(bo, evict, interruptible, no_wait_gpu, + mem); else if (bdev->driver->move) ret = bdev->driver->move(bo, evict, interruptible, no_wait_gpu, mem); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2df602a35f92..f157a9efd220 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -45,7 +45,7 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo) } int ttm_bo_move_ttm(struct ttm_buffer_object *bo, - bool evict, + bool evict, bool interruptible, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { struct ttm_tt *ttm = bo->ttm; @@ -53,6 +53,14 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo, int ret; if (old_mem->mem_type != TTM_PL_SYSTEM) { + ret = ttm_bo_wait(bo, interruptible, no_wait_gpu); + + if (unlikely(ret != 0)) { + if (ret != -ERESTARTSYS) + pr_err("Failed to expire sync object before unbinding TTM\n"); + return ret; + } + ttm_tt_unbind(ttm); ttm_bo_free_old_node(bo); ttm_flag_masked(&old_mem->placement, TTM_PL_FLAG_SYSTEM, diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 4348d6d5877a..99c6d01d24f2 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -962,6 +962,7 @@ void ttm_mem_io_free(struct ttm_bo_device *bdev, * * @bo: A pointer to a struct ttm_buffer_object. * @evict: 1: This is an eviction. Don't try to pipeline. + * @interruptible: Sleep interruptible if waiting. * @no_wait_gpu: Return immediately if the GPU is busy. * @new_mem: struct ttm_mem_reg indicating where to move. * @@ -976,7 +977,7 @@ void ttm_mem_io_free(struct ttm_bo_device *bdev, */ extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo, - bool evict, bool no_wait_gpu, + bool evict, bool interruptible, bool no_wait_gpu, struct ttm_mem_reg *new_mem); /** -- cgit v1.2.3 From 8e85946777d3f667085f298c0acfec12fe105078 Mon Sep 17 00:00:00 2001 From: Akshay Adiga Date: Thu, 4 Aug 2016 20:59:17 +0530 Subject: cpufreq: powernv: Fix crash in gpstate_timer_handler() Commit 09ca4c9b5958 (cpufreq: powernv: Replacing pstate_id with frequency table index) changes calc_global_pstate() to use cpufreq_table index instead of pstate_id. But in gpstate_timer_handler(), pstate_id was being passed instead of cpufreq_table index, which caused index_to_pstate() to access out of bound indices, leading to this crash. Adding sanity check for index and pstate, to ensure only valid pstate and index values are returned. Call Trace: [c00000078d66b130] [c00000000011d224] __free_irq+0x234/0x360 (unreliable) [c00000078d66b1c0] [c00000000011d44c] free_irq+0x6c/0xa0 [c00000078d66b1f0] [c00000000006c4f8] opal_event_shutdown+0x88/0xd0 [c00000078d66b230] [c000000000067a4c] opal_shutdown+0x1c/0x90 [c00000078d66b260] [c000000000063a00] pnv_shutdown+0x20/0x40 [c00000078d66b280] [c000000000021538] machine_restart+0x38/0x90 [c0000000078d66b310] [c000000000965ea0] panic+0x284/0x300 [c00000078d66b3a0] [c00000000001f508] die+0x388/0x450 [c00000078d66b430] [c000000000045a50] bad_page_fault+0xd0/0x140 [c00000078d66b4a0] [c000000000008964] handle_page_fault+0x2c/0x30 interrupt: 300 at gpstate_timer_handler+0x150/0x260 LR = gpstate_timer_handler+0x130/0x260 [c00000078d66b7f0] [c000000000132b58] call_timer_fn+0x58/0x1c0 [c00000078d66b880] [c000000000132e20] expire_timers+0x130/0x1d0 [c00000078d66b8f0] [c000000000133068] run_timer_softirq+0x1a8/0x230 [c00000078d66b980] [c0000000000b535c] __do_softirq+0x18c/0x400 [c00000078d66ba70] [c0000000000b5828] irq_exit+0xc8/0x100 [c00000078d66ba90] [c00000000001e214] timer_interrupt+0xa4/0xe0 [c00000078d66bac0] [c0000000000027d0] decrementer_common+0x150/0x180 interrupt: 901 at arch_local_irq_restore+0x74/0x90 0] [c000000000106b34] call_cpuidle+0x44/0x90 [c00000078d66be50] [c00000000010708c] cpu_startup_entry+0x38c/0x460 [c00000078d66bf20] [c00000000003d930] start_secondary+0x330/0x380 [c00000078d66bf90] [c000000000008e6c] start_secondary_prolog+0x10/0x14 Fixes: 09ca4c9b5958 (cpufreq: powernv: Replacing pstate_id with frequency table index) Reported-by: Madhavan Srinivasan Signed-off-by: Akshay Adiga Acked-by: Viresh Kumar Tested-by: Andrew Donnellan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 0f138b050e9a..2fcc879d2b97 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -145,11 +145,30 @@ static struct powernv_pstate_info { /* Use following macros for conversions between pstate_id and index */ static inline int idx_to_pstate(unsigned int i) { + if (unlikely(i >= powernv_pstate_info.nr_pstates)) { + pr_warn_once("index %u is out of bound\n", i); + return powernv_freqs[powernv_pstate_info.nominal].driver_data; + } + return powernv_freqs[i].driver_data; } static inline unsigned int pstate_to_idx(int pstate) { + int min = powernv_freqs[powernv_pstate_info.min].driver_data; + int max = powernv_freqs[powernv_pstate_info.max].driver_data; + + if (min > 0) { + if (unlikely((pstate < max) || (pstate > min))) { + pr_warn_once("pstate %d is out of bound\n", pstate); + return powernv_pstate_info.nominal; + } + } else { + if (unlikely((pstate > max) || (pstate < min))) { + pr_warn_once("pstate %d is out of bound\n", pstate); + return powernv_pstate_info.nominal; + } + } /* * abs() is deliberately used so that is works with * both monotonically increasing and decreasing @@ -594,7 +613,7 @@ void gpstate_timer_handler(unsigned long data) } else { gpstate_idx = calc_global_pstate(gpstates->elapsed_time, gpstates->highest_lpstate_idx, - freq_data.pstate_id); + gpstates->last_lpstate_idx); } /* -- cgit v1.2.3 From 176b1ec21306f0775e6d7a03f42d82a0b144ef0e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 23 Jun 2016 15:02:46 -0700 Subject: thermal: intel_pch_thermal: Add suspend/resume callback Added suspend/resume callback to disable/enable PCH thermal sensor respectively. If the sensor is enabled by the BIOS, then the sensor status will not be changed during suspend/resume. Signed-off-by: Srinivas Pandruvada Signed-off-by: Zhang Rui --- drivers/thermal/intel_pch_thermal.c | 60 ++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c index 6a6ec1c95a7a..9b4815e81b0d 100644 --- a/drivers/thermal/intel_pch_thermal.c +++ b/drivers/thermal/intel_pch_thermal.c @@ -21,6 +21,7 @@ #include #include #include +#include /* Intel PCH thermal Device IDs */ #define PCH_THERMAL_DID_WPT 0x9CA4 /* Wildcat Point */ @@ -65,6 +66,7 @@ struct pch_thermal_device { unsigned long crt_temp; int hot_trip_id; unsigned long hot_temp; + bool bios_enabled; }; static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips) @@ -75,8 +77,10 @@ static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips) *nr_trips = 0; /* Check if BIOS has already enabled thermal sensor */ - if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS)) + if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS)) { + ptd->bios_enabled = true; goto read_trips; + } tsel = readb(ptd->hw_base + WPT_TSEL); /* @@ -130,9 +134,39 @@ static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp) return 0; } +static int pch_wpt_suspend(struct pch_thermal_device *ptd) +{ + u8 tsel; + + if (ptd->bios_enabled) + return 0; + + tsel = readb(ptd->hw_base + WPT_TSEL); + + writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL); + + return 0; +} + +static int pch_wpt_resume(struct pch_thermal_device *ptd) +{ + u8 tsel; + + if (ptd->bios_enabled) + return 0; + + tsel = readb(ptd->hw_base + WPT_TSEL); + + writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); + + return 0; +} + struct pch_dev_ops { int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips); int (*get_temp)(struct pch_thermal_device *ptd, int *temp); + int (*suspend)(struct pch_thermal_device *ptd); + int (*resume)(struct pch_thermal_device *ptd); }; @@ -140,6 +174,8 @@ struct pch_dev_ops { static const struct pch_dev_ops pch_dev_ops_wpt = { .hw_init = pch_wpt_init, .get_temp = pch_wpt_get_temp, + .suspend = pch_wpt_suspend, + .resume = pch_wpt_resume, }; static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) @@ -269,6 +305,22 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static int intel_pch_thermal_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct pch_thermal_device *ptd = pci_get_drvdata(pdev); + + return ptd->ops->suspend(ptd); +} + +static int intel_pch_thermal_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct pch_thermal_device *ptd = pci_get_drvdata(pdev); + + return ptd->ops->resume(ptd); +} + static struct pci_device_id intel_pch_thermal_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) }, @@ -276,11 +328,17 @@ static struct pci_device_id intel_pch_thermal_id[] = { }; MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id); +static const struct dev_pm_ops intel_pch_pm_ops = { + .suspend = intel_pch_thermal_suspend, + .resume = intel_pch_thermal_resume, +}; + static struct pci_driver intel_pch_thermal_driver = { .name = "intel_pch_thermal", .id_table = intel_pch_thermal_id, .probe = intel_pch_thermal_probe, .remove = intel_pch_thermal_remove, + .driver.pm = &intel_pch_pm_ops, }; module_pci_driver(intel_pch_thermal_driver); -- cgit v1.2.3 From 70c50ee72eebb3d1e3aae7450269fd8d8074c610 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 5 Aug 2016 15:20:41 +0200 Subject: thermal/powerclamp: Prevent division by zero when counting interval I have got a zero division error when disabling the forced idle injection from the intel powerclamp. I did echo 0 >/sys/class/thermal/cooling_device48/cur_state and got [ 986.072632] divide error: 0000 [#1] PREEMPT SMP [ 986.078989] Modules linked in: [ 986.083618] CPU: 17 PID: 24967 Comm: kidle_inject/17 Not tainted 4.7.0-1-default+ #3055 [ 986.093781] Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.R3.27.D685.1305151734 05/15/2013 [ 986.106227] task: ffff880430e1c080 task.stack: ffff880427ef0000 [ 986.114122] RIP: 0010:[] [] clamp_thread+0x1d9/0x600 [ 986.124609] RSP: 0018:ffff880427ef3e20 EFLAGS: 00010246 [ 986.131860] RAX: 0000000000000258 RBX: 0000000000000006 RCX: 0000000000000001 [ 986.141179] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000018 [ 986.150478] RBP: ffff880427ef3ec8 R08: ffff880427ef0000 R09: 0000000000000002 [ 986.159779] R10: 0000000000003df2 R11: 0000000000000018 R12: 0000000000000002 [ 986.169089] R13: 0000000000000000 R14: ffff880427ef0000 R15: ffff880427ef0000 [ 986.178388] FS: 0000000000000000(0000) GS:ffff880435940000(0000) knlGS:0000000000000000 [ 986.188785] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 986.196559] CR2: 00007f1d0caf0000 CR3: 0000000002006000 CR4: 00000000001406e0 [ 986.205909] Stack: [ 986.209524] ffff8802be897b00 ffff880430e1c080 0000000000000011 0000006a35959780 [ 986.219236] 0000000000000011 ffff880427ef0008 0000000000000000 ffff8804359503d0 [ 986.228966] 0000000100029d93 ffffffff81794140 0000000000000000 ffffffff05000011 [ 986.238686] Call Trace: [ 986.242825] [] ? pkg_state_counter+0x80/0x80 [ 986.250866] [] ? powerclamp_set_cur_state+0x180/0x180 [ 986.259797] [] kthread+0xc9/0xe0 [ 986.266682] [] ret_from_fork+0x1f/0x40 [ 986.274142] [] ? kthread_create_on_node+0x180/0x180 [ 986.282869] Code: d1 ea 48 89 d6 80 3d 6a d0 d4 00 00 ba 64 00 00 00 89 d8 41 0f 45 f5 0f af c2 42 8d 14 2e be 31 00 00 00 83 fa 31 0f 42 f2 31 d2 f6 48 8b 15 9e 07 87 00 48 8b 3d 97 07 87 00 48 63 f0 83 e8 [ 986.307806] RIP [] clamp_thread+0x1d9/0x600 [ 986.315871] RSP RIP points to the following lines: compensation = get_compensation(target_ratio); interval = duration_jiffies*100/(target_ratio+compensation); A solution would be to switch the following two commands in powerclamp_set_cur_state(): set_target_ratio = 0; end_power_clamp(); But I think that the zero division might happen also when target_ratio is non-zero because the compensation might be negative. Therefore we also check the sum of target_ratio and compensation explicitly. Also the compensated_ratio variable is always set. Therefore there is no need to initialize it. Signed-off-by: Petr Mladek Acked-by: Jacob Pan Signed-off-by: Zhang Rui --- drivers/thermal/intel_powerclamp.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 015ce2eb6eb7..0e4dc0afcfd2 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -388,7 +388,7 @@ static int clamp_thread(void *arg) int sleeptime; unsigned long target_jiffies; unsigned int guard; - unsigned int compensation = 0; + unsigned int compensated_ratio; int interval; /* jiffies to sleep for each attempt */ unsigned int duration_jiffies = msecs_to_jiffies(duration); unsigned int window_size_now; @@ -409,8 +409,11 @@ static int clamp_thread(void *arg) * c-states, thus we need to compensate the injected idle ratio * to achieve the actual target reported by the HW. */ - compensation = get_compensation(target_ratio); - interval = duration_jiffies*100/(target_ratio+compensation); + compensated_ratio = target_ratio + + get_compensation(target_ratio); + if (compensated_ratio <= 0) + compensated_ratio = 1; + interval = duration_jiffies * 100 / compensated_ratio; /* align idle time */ target_jiffies = roundup(jiffies, interval); @@ -647,8 +650,8 @@ static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, goto exit_set; } else if (set_target_ratio > 0 && new_target_ratio == 0) { pr_info("Stop forced idle injection\n"); - set_target_ratio = 0; end_power_clamp(); + set_target_ratio = 0; } else /* adjust currently running */ { set_target_ratio = new_target_ratio; /* make new set_target_ratio visible to other cpus */ -- cgit v1.2.3 From d0b7306d203c82e7c04d6eb066ca4898f016ebdd Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Thu, 2 Jun 2016 15:25:31 +0100 Subject: thermal: fix race condition when updating cooling device When multiple thermal zones are bound to the same cooling device, multiple kernel threads may want to update the cooling device state by calling thermal_cdev_update(). Having cdev not protected by a mutex can lead to a race condition. Consider the following situation with two kernel threads k1 and k2: Thread k1 Thread k2 || || call thermal_cdev_update() || ... || set_cur_state(cdev, target); call power_actor_set_power() || ... || instance->target = state; || cdev->updated = false; || || cdev->updated = true; || // completes execution call thermal_cdev_update() || // cdev->updated == true || return; || \/ time k2 has already looped through the thermal instances looking for the deepest cooling device state and is preempted right before setting cdev->updated to true. Now, k1 runs, modifies the thermal instance state and sets cdev->updated to false. Then, k1 is preempted and k2 continues the execution by setting cdev->updated to true, therefore preventing k1 from performing the update. Notice that this is not an issue if k2 looks at the instance->target modified by k1 "after" it is assigned by k1. In fact, in this case the update will happen anyway and k1 can safely return immediately from thermal_cdev_update(). This may lead to a situation where a thermal governor never updates the cooling device. For example, this is the case for the step_wise governor: when calling the function thermal_zone_trip_update(), the governor may always get a new state equal to the old one (which, however, wasn't notified to the cooling device) and will therefore skip the update. CC: Zhang Rui CC: Eduardo Valentin CC: Peter Feuerer Reported-by: Toby Huang Signed-off-by: Michele Di Giorgio Reviewed-by: Javi Merino Signed-off-by: Zhang Rui --- drivers/thermal/fair_share.c | 2 ++ drivers/thermal/gov_bang_bang.c | 2 ++ drivers/thermal/power_allocator.c | 2 ++ drivers/thermal/step_wise.c | 2 ++ drivers/thermal/thermal_core.c | 10 +++++++--- 5 files changed, 15 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c index 34fe36504a55..68bd1b569118 100644 --- a/drivers/thermal/fair_share.c +++ b/drivers/thermal/fair_share.c @@ -116,7 +116,9 @@ static int fair_share_throttle(struct thermal_zone_device *tz, int trip) instance->target = get_target_state(tz, cdev, percentage, cur_trip_level); + mutex_lock(&instance->cdev->lock); instance->cdev->updated = false; + mutex_unlock(&instance->cdev->lock); thermal_cdev_update(cdev); } return 0; diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index fc52016d4e85..bb118a152cbb 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -71,7 +71,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) dev_dbg(&instance->cdev->device, "target=%d\n", (int)instance->target); + mutex_lock(&instance->cdev->lock); instance->cdev->updated = false; /* cdev needs update */ + mutex_unlock(&instance->cdev->lock); } mutex_unlock(&tz->lock); diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 2f1a863a8e15..b4d3116cfdaf 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -529,7 +529,9 @@ static void allow_maximum_power(struct thermal_zone_device *tz) continue; instance->target = 0; + mutex_lock(&instance->cdev->lock); instance->cdev->updated = false; + mutex_unlock(&instance->cdev->lock); thermal_cdev_update(instance->cdev); } } diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index ea9366ad3e6b..bcef2e7c4ec9 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -175,7 +175,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) update_passive_instance(tz, trip_type, -1); instance->initialized = true; + mutex_lock(&instance->cdev->lock); instance->cdev->updated = false; /* cdev needs update */ + mutex_unlock(&instance->cdev->lock); } mutex_unlock(&tz->lock); diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 5133cd1e10b7..e2fc6161dded 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1093,7 +1093,9 @@ int power_actor_set_power(struct thermal_cooling_device *cdev, return ret; instance->target = state; + mutex_lock(&cdev->lock); cdev->updated = false; + mutex_unlock(&cdev->lock); thermal_cdev_update(cdev); return 0; @@ -1623,11 +1625,13 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev) struct thermal_instance *instance; unsigned long target = 0; + mutex_lock(&cdev->lock); /* cooling device is updated*/ - if (cdev->updated) + if (cdev->updated) { + mutex_unlock(&cdev->lock); return; + } - mutex_lock(&cdev->lock); /* Make sure cdev enters the deepest cooling state */ list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) { dev_dbg(&cdev->device, "zone%d->target=%lu\n", @@ -1637,9 +1641,9 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev) if (instance->target > target) target = instance->target; } - mutex_unlock(&cdev->lock); cdev->ops->set_cur_state(cdev, target); cdev->updated = true; + mutex_unlock(&cdev->lock); trace_cdev_update(cdev, target); dev_dbg(&cdev->device, "set to state %lu\n", target); } -- cgit v1.2.3 From f4c592439b144ef1de66eac764140643a54099eb Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 4 Jul 2016 07:19:32 +0000 Subject: thermal: hwmon: EXPORT_SYMBOL_GPL for thermal hwmon sysfs thermal_add_hwmon_sysfs()/thermal_remove_hwmon_sysfs() need EXPORT_SYMBOL_GPL(). Otherwise we will have ERROR >> ERROR: "thermal_remove_hwmon_sysfs" [drivers/thermal/rcar_thermal.ko] undefined! >> ERROR: "thermal_add_hwmon_sysfs" [drivers/thermal/rcar_thermal.ko] undefined! Signed-off-by: Kuninori Morimoto Signed-off-by: Zhang Rui --- drivers/thermal/thermal_hwmon.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index 06fd2ed9ef9d..c41c7742903a 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -232,6 +232,7 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) return result; } +EXPORT_SYMBOL_GPL(thermal_add_hwmon_sysfs); void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) { @@ -270,3 +271,4 @@ void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) hwmon_device_unregister(hwmon->device); kfree(hwmon); } +EXPORT_SYMBOL_GPL(thermal_remove_hwmon_sysfs); -- cgit v1.2.3 From 165989a5b667b90589f21c8affe496ad21f08591 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 25 Jul 2016 07:01:19 +0000 Subject: thermal: clock_cooling: Fix missing mutex_init() The driver allocates the mutex but not initialize it. Use mutex_init() on it to initialize it correctly. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Signed-off-by: Zhang Rui --- drivers/thermal/clock_cooling.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c index 1b4ff0f4c716..ed5dd0e88657 100644 --- a/drivers/thermal/clock_cooling.c +++ b/drivers/thermal/clock_cooling.c @@ -426,6 +426,7 @@ clock_cooling_register(struct device *dev, const char *clock_name) if (!ccdev) return ERR_PTR(-ENOMEM); + mutex_init(&ccdev->lock); ccdev->dev = dev; ccdev->clk = devm_clk_get(dev, clock_name); if (IS_ERR(ccdev->clk)) -- cgit v1.2.3 From 29986cc8a66fcc4b8ea8a01216a72f06b865360b Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 3 Aug 2016 23:31:36 +0300 Subject: drm: rcar-du: Link HDMI encoder with bridge The conversion of the rcar-du driver from the I2C slave encoder to the DRM bridge API left the HDMI encoder's bridge pointer NULL, preventing the bridge from being handled automatically by the DRM core. Fix it. Fixes: 1d926114d8f4 ("drm: rcar-du: Remove i2c slave encoder interface for hdmi encoder") Signed-off-by: Laurent Pinchart Signed-off-by: Dave Airlie --- drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c index 4de3ff0dbebd..e03004f4588d 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c @@ -125,6 +125,7 @@ int rcar_du_hdmienc_init(struct rcar_du_device *rcdu, /* Link drm_bridge to encoder */ bridge->encoder = encoder; + encoder->bridge = bridge; ret = drm_bridge_attach(rcdu->ddev, bridge); if (ret) { -- cgit v1.2.3 From 5c6c201ccbaf9d3901f829441d457293f7ca8ef4 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 4 Aug 2016 18:35:48 +0200 Subject: drm: Paper over locking inversion after registration rework drm_connector_register_all requires a few too many locks because our connector_list locking is busted. Add another FIXME+hack to work around this. This should address the below lockdep splat: ====================================================== [ INFO: possible circular locking dependency detected ] 4.7.0-rc5+ #524 Tainted: G O ------------------------------------------------------- kworker/u8:0/6 is trying to acquire lock: (&dev->mode_config.mutex){+.+.+.}, at: [] drm_modeset_lock_all+0x40/0x120 but task is already holding lock: ((fb_notifier_list).rwsem){++++.+}, at: [] __blocking_notifier_call_chain+0x35/0x70 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 ((fb_notifier_list).rwsem){++++.+}: [] lock_acquire+0xb1/0x200 [] down_write+0x44/0x80 [] blocking_notifier_chain_register+0x21/0xb0 [] fb_register_client+0x18/0x20 [] backlight_device_register+0x136/0x260 [] intel_backlight_device_register+0xa2/0x160 [i915] [] intel_connector_register+0xe/0x10 [i915] [] intel_dp_connector_register+0x1b/0x80 [i915] [] drm_connector_register+0x4a/0x80 [] drm_connector_register_all+0x64/0xf0 [] drm_modeset_register_all+0x174/0x1c0 [] drm_dev_register+0xc2/0xd0 [] i915_driver_load+0x1547/0x2200 [i915] [] i915_pci_probe+0x4f/0x70 [i915] [] local_pci_probe+0x45/0xa0 [] pci_device_probe+0xdb/0x130 [] driver_probe_device+0x223/0x440 [] __driver_attach+0xd5/0x100 [] bus_for_each_dev+0x66/0xa0 [] driver_attach+0x1e/0x20 [] bus_add_driver+0x1ee/0x280 [] driver_register+0x60/0xe0 [] __pci_register_driver+0x60/0x70 [] i915_init+0x5b/0x62 [i915] [] do_one_initcall+0x3d/0x150 [] do_init_module+0x5f/0x1d9 [] load_module+0x20e6/0x27e0 [] SYSC_finit_module+0xc3/0xf0 [] SyS_finit_module+0xe/0x10 [] entry_SYSCALL_64_fastpath+0x1c/0xac -> #0 (&dev->mode_config.mutex){+.+.+.}: [] __lock_acquire+0x10fc/0x1260 [] lock_acquire+0xb1/0x200 [] mutex_lock_nested+0x67/0x3c0 [] drm_modeset_lock_all+0x40/0x120 [] drm_fb_helper_restore_fbdev_mode_unlocked+0x2b/0x80 [] drm_fb_helper_set_par+0x2d/0x50 [] intel_fbdev_set_par+0x1a/0x60 [i915] [] fbcon_init+0x586/0x610 [] visual_init+0xca/0x130 [] do_bind_con_driver+0x1c1/0x3a0 [] do_take_over_console+0x116/0x180 [] do_fbcon_takeover+0x57/0xb0 [] fbcon_event_notify+0x658/0x750 [] notifier_call_chain+0x3e/0xb0 [] __blocking_notifier_call_chain+0x4d/0x70 [] blocking_notifier_call_chain+0x16/0x20 [] fb_notifier_call_chain+0x1b/0x20 [] register_framebuffer+0x251/0x330 [] drm_fb_helper_initial_config+0x25f/0x3f0 [] intel_fbdev_initial_config+0x18/0x30 [i915] [] async_run_entry_fn+0x48/0x150 [] process_one_work+0x1e7/0x750 [] worker_thread+0x4b/0x4f0 [] kthread+0xef/0x110 [] ret_from_fork+0x1f/0x40 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((fb_notifier_list).rwsem); lock(&dev->mode_config.mutex); lock((fb_notifier_list).rwsem); lock(&dev->mode_config.mutex); *** DEADLOCK *** 6 locks held by kworker/u8:0/6: #0: ("events_unbound"){.+.+.+}, at: [] process_one_work+0x169/0x750 #1: ((&entry->work)){+.+.+.}, at: [] process_one_work+0x169/0x750 #2: (registration_lock){+.+.+.}, at: [] register_framebuffer+0x27/0x330 #3: (console_lock){+.+.+.}, at: [] register_framebuffer+0x26e/0x330 #4: (&fb_info->lock){+.+.+.}, at: [] lock_fb_info+0x1d/0x40 #5: ((fb_notifier_list).rwsem){++++.+}, at: [] __blocking_notifier_call_chain+0x35/0x70 stack backtrace: CPU: 2 PID: 6 Comm: kworker/u8:0 Tainted: G O 4.7.0-rc5+ #524 Hardware name: Intel Corp. Broxton P/NOTEBOOK, BIOS APLKRVPA.X64.0138.B33.1606250842 06/25/2016 Workqueue: events_unbound async_run_entry_fn 0000000000000000 ffff8800758577f0 ffffffff814507a5 ffffffff828b9900 ffffffff828b9900 ffff880075857830 ffffffff810dc6fa ffff880075857880 ffff88007584d688 0000000000000005 0000000000000006 ffff88007584d6b0 Call Trace: [] dump_stack+0x67/0x92 [] print_circular_bug+0x1aa/0x200 [] __lock_acquire+0x10fc/0x1260 [] lock_acquire+0xb1/0x200 [] ? drm_modeset_lock_all+0x40/0x120 [] ? drm_modeset_lock_all+0x40/0x120 [] mutex_lock_nested+0x67/0x3c0 [] ? drm_modeset_lock_all+0x40/0x120 [] ? rcu_read_lock_sched_held+0x7f/0x90 [] ? kmem_cache_alloc_trace+0x248/0x2b0 [] ? drm_modeset_lock_all+0x25/0x120 [] drm_modeset_lock_all+0x40/0x120 [] drm_fb_helper_restore_fbdev_mode_unlocked+0x2b/0x80 [] drm_fb_helper_set_par+0x2d/0x50 [] intel_fbdev_set_par+0x1a/0x60 [i915] [] fbcon_init+0x586/0x610 [] visual_init+0xca/0x130 [] do_bind_con_driver+0x1c1/0x3a0 [] do_take_over_console+0x116/0x180 [] do_fbcon_takeover+0x57/0xb0 [] fbcon_event_notify+0x658/0x750 [] notifier_call_chain+0x3e/0xb0 [] __blocking_notifier_call_chain+0x4d/0x70 [] blocking_notifier_call_chain+0x16/0x20 [] fb_notifier_call_chain+0x1b/0x20 [] register_framebuffer+0x251/0x330 [] ? vga_switcheroo_client_fb_set+0x5d/0x70 [] drm_fb_helper_initial_config+0x25f/0x3f0 [] intel_fbdev_initial_config+0x18/0x30 [i915] [] async_run_entry_fn+0x48/0x150 [] process_one_work+0x1e7/0x750 [] ? process_one_work+0x169/0x750 [] worker_thread+0x4b/0x4f0 [] ? process_one_work+0x750/0x750 [] kthread+0xef/0x110 [] ret_from_fork+0x1f/0x40 [] ? kthread_stop+0x2e0/0x2e0 v2: Rebase onto the right branch (hand-editing patches ftw) and add more reporters. Reported-by: Imre Deak Cc: Imre Deak Cc: Chris Wilson Acked-by: Chris Wilson Reported-by: Jiri Kosina Cc: Jiri Kosina Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index f1d9f0569d7f..b1dbb60af99f 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1121,16 +1121,14 @@ static int drm_connector_register_all(struct drm_device *dev) struct drm_connector *connector; int ret; - mutex_lock(&dev->mode_config.mutex); - - drm_for_each_connector(connector, dev) { + /* FIXME: taking the mode config mutex ends up in a clash with + * fbcon/backlight registration */ + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { ret = drm_connector_register(connector); if (ret) goto err; } - mutex_unlock(&dev->mode_config.mutex); - return 0; err: -- cgit v1.2.3 From 68202c9f0ad6e16ee806fbadbc5838d55fe5aa5c Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 29 Jul 2016 14:59:12 -0600 Subject: libnvdimm, nd_blk: mask off reserved status bits The "NVDIMM Block Window Driver Writer's Guide": http://pmem.io/documents/NVDIMM_DriverWritersGuide-July-2016.pdf ...defines the layout of the block window status register. For the July 2016 version of the spec linked to above, this happens in Figure 4 on page 26. The only bits defined in this spec are bits 31, 5, 4, 2, 1 and 0. The rest of the bits in the status register are reserved, and there is a warning following the diagram that says: Note: The driver cannot assume the value of the RESERVED bits in the status register are zero. These reserved bits need to be masked off, and the driver must avoid checking the state of those bits. This change ensures that for hardware implementations that set these reserved bits in the status register, the driver won't incorrectly fail the block I/Os. Cc: #v4.2+ Reviewed-by: Lee, Chun-Yi Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 8c234dd9b8bc..80cc7c089a15 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1527,11 +1527,12 @@ static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) { struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; u64 offset = nfit_blk->stat_offset + mmio->size * bw; + const u32 STATUS_MASK = 0x80000037; if (mmio->num_lines) offset = to_interleave_offset(offset, mmio); - return readl(mmio->addr.base + offset); + return readl(mmio->addr.base + offset) & STATUS_MASK; } static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, -- cgit v1.2.3 From abe8b4e3cef88b8202641d63f5ad58141b970b0f Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 27 Jul 2016 16:38:59 -0600 Subject: nvdimm, btt: add a size attribute for BTTs To be consistent with other namespaces, expose a 'size' attribute for BTT devices also. Cc: Dan Williams Reported-by: Linda Knippers Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 1 + drivers/nvdimm/btt_devs.c | 20 ++++++++++++++++++++ drivers/nvdimm/nd.h | 1 + 3 files changed, 22 insertions(+) (limited to 'drivers') diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 88e91666f145..368795aad5c9 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1269,6 +1269,7 @@ static int btt_blk_init(struct btt *btt) } } set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); + btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; revalidate_disk(btt->btt_disk); return 0; diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 3fa7919f94a8..97dd2925ed6e 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -140,10 +140,30 @@ static ssize_t namespace_store(struct device *dev, } static DEVICE_ATTR_RW(namespace); +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + ssize_t rc; + + device_lock(dev); + if (dev->driver) + rc = sprintf(buf, "%llu\n", nd_btt->size); + else { + /* no size to convey if the btt instance is disabled */ + rc = -ENXIO; + } + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(size); + static struct attribute *nd_btt_attributes[] = { &dev_attr_sector_size.attr, &dev_attr_namespace.attr, &dev_attr_uuid.attr, + &dev_attr_size.attr, NULL, }; diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 40476399d227..8024a0ef86d3 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -143,6 +143,7 @@ struct nd_btt { struct nd_namespace_common *ndns; struct btt *btt; unsigned long lbasize; + u64 size; u8 *uuid; int id; }; -- cgit v1.2.3 From 65a97a67a7d7b7a953492573d5740d32510b9daa Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 27 Jul 2016 09:34:42 +0200 Subject: dell-wmi: Ignore WMI event 0xe00e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WMI event 0xe00e is received when battery was removed or inserted. Signed-off-by: Pali Rohár Signed-off-by: Darren Hart --- drivers/platform/x86/dell-wmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index d2bc092defd7..da2fe18162e1 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -110,8 +110,8 @@ static const struct key_entry dell_wmi_keymap_type_0000[] __initconst = { /* BIOS error detected */ { KE_IGNORE, 0xe00d, { KEY_RESERVED } }, - /* Unknown, defined in ACPI DSDT */ - /* { KE_IGNORE, 0xe00e, { KEY_RESERVED } }, */ + /* Battery was removed or inserted */ + { KE_IGNORE, 0xe00e, { KEY_RESERVED } }, /* Wifi Catcher */ { KE_KEY, 0xe011, { KEY_PROG2 } }, -- cgit v1.2.3 From 6b6dddbe11b13bb00e0f9a1af2021e266811be85 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 5 Aug 2016 16:15:38 +0200 Subject: rbd: destroy header_oloc in rbd_dev_release() Purely cosmetic at this point, as rbd doesn't use RADOS namespaces and hence rbd_dev->header_oloc->pool_ns is always NULL. Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 1a04af6d2421..07668a6f0607 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3950,6 +3950,7 @@ static void rbd_dev_release(struct device *dev) bool need_put = !!rbd_dev->opts; ceph_oid_destroy(&rbd_dev->header_oid); + ceph_oloc_destroy(&rbd_dev->header_oloc); rbd_put_client(rbd_dev->rbd_client); rbd_spec_put(rbd_dev->spec); -- cgit v1.2.3 From c8952a707556e04374d7b2fdb3a079d63ddf6f2f Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 8 Aug 2016 16:16:23 -0600 Subject: vfio/pci: Fix NULL pointer oops in error interrupt setup handling There are multiple cases in vfio_pci_set_ctx_trigger_single() where we assume we can safely read from our data pointer without actually checking whether the user has passed any data via the count field. VFIO_IRQ_SET_DATA_NONE in particular is entirely broken since we attempt to pull an int32_t file descriptor out before even checking the data type. The other data types assume the data pointer contains one element of their type as well. In part this is good news because we were previously restricted from doing much sanitization of parameters because it was missed in the past and we didn't want to break existing users. Clearly DATA_NONE is completely broken, so it must not have any users and we can fix it up completely. For DATA_BOOL and DATA_EVENTFD, we'll just protect ourselves, returning error when count is zero since we previously would have oopsed. Signed-off-by: Alex Williamson Reported-by: Chris Thompson Cc: stable@vger.kernel.org Reviewed-by: Eric Auger --- drivers/vfio/pci/vfio_pci_intrs.c | 85 ++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 15ecfc9c5f6c..152b43822ef1 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -564,67 +564,80 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev, } static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx, - uint32_t flags, void *data) + unsigned int count, uint32_t flags, + void *data) { - int32_t fd = *(int32_t *)data; - - if (!(flags & VFIO_IRQ_SET_DATA_TYPE_MASK)) - return -EINVAL; - /* DATA_NONE/DATA_BOOL enables loopback testing */ if (flags & VFIO_IRQ_SET_DATA_NONE) { - if (*ctx) - eventfd_signal(*ctx, 1); - return 0; + if (*ctx) { + if (count) { + eventfd_signal(*ctx, 1); + } else { + eventfd_ctx_put(*ctx); + *ctx = NULL; + } + return 0; + } } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { - uint8_t trigger = *(uint8_t *)data; + uint8_t trigger; + + if (!count) + return -EINVAL; + + trigger = *(uint8_t *)data; if (trigger && *ctx) eventfd_signal(*ctx, 1); - return 0; - } - /* Handle SET_DATA_EVENTFD */ - if (fd == -1) { - if (*ctx) - eventfd_ctx_put(*ctx); - *ctx = NULL; return 0; - } else if (fd >= 0) { - struct eventfd_ctx *efdctx; - efdctx = eventfd_ctx_fdget(fd); - if (IS_ERR(efdctx)) - return PTR_ERR(efdctx); - if (*ctx) - eventfd_ctx_put(*ctx); - *ctx = efdctx; + } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd; + + if (!count) + return -EINVAL; + + fd = *(int32_t *)data; + if (fd == -1) { + if (*ctx) + eventfd_ctx_put(*ctx); + *ctx = NULL; + } else if (fd >= 0) { + struct eventfd_ctx *efdctx; + + efdctx = eventfd_ctx_fdget(fd); + if (IS_ERR(efdctx)) + return PTR_ERR(efdctx); + + if (*ctx) + eventfd_ctx_put(*ctx); + + *ctx = efdctx; + } return 0; - } else - return -EINVAL; + } + + return -EINVAL; } static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - if (index != VFIO_PCI_ERR_IRQ_INDEX) + if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1) return -EINVAL; - /* - * We should sanitize start & count, but that wasn't caught - * originally, so this IRQ index must forever ignore them :-( - */ - - return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, flags, data); + return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, + count, flags, data); } static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count != 1) + if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1) return -EINVAL; - return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, flags, data); + return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, + count, flags, data); } int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, -- cgit v1.2.3 From e10aec652f31ec61d6a0b4d00d8ef8d2b66fa0fd Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 6 Jul 2016 12:05:44 +0200 Subject: drm/edid: Add 6 bpc quirk for display AEO model 0. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bugzilla https://bugzilla.kernel.org/show_bug.cgi?id=105331 reports that the "AEO model 0" display is driven with 8 bpc without dithering by default, which looks bad because that panel is apparently a 6 bpc DP panel with faulty EDID. A fix for this was made by commit 013dd9e03872 ("drm/i915/dp: fall back to 18 bpp when sink capability is unknown"). That commit triggers new regressions in precision for DP->DVI and DP->VGA displays. A patch is out to revert that commit, but it will revert video output for the AEO model 0 panel to 8 bpc without dithering. The EDID 1.3 of that panel, as decoded from the xrandr output attached to that bugzilla bug report, is somewhat faulty, and beyond other problems also sets the "DFP 1.x compliant TMDS" bit, which according to DFP spec means to drive the panel with 8 bpc and no dithering in absence of other colorimetry information. Try to make the original bug reporter happy despite the faulty EDID by adding a quirk to mark that panel as 6 bpc, so 6 bpc output with dithering creates a nice picture. Tested by injecting the edid from the fdo bug into a DP connector via drm_kms_helper.edid_firmware and verifying the 6 bpc + dithering is selected. This patch should be backported to stable. Signed-off-by: Mario Kleiner Cc: stable@vger.kernel.org Cc: Jani Nikula Cc: Ville Syrjälä Cc: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 7df26d4b7ad8..2cb472b9976a 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -74,6 +74,8 @@ #define EDID_QUIRK_FORCE_8BPC (1 << 8) /* Force 12bpc */ #define EDID_QUIRK_FORCE_12BPC (1 << 9) +/* Force 6bpc */ +#define EDID_QUIRK_FORCE_6BPC (1 << 10) struct detailed_mode_closure { struct drm_connector *connector; @@ -100,6 +102,9 @@ static struct edid_quirk { /* Unknown Acer */ { "ACR", 2423, EDID_QUIRK_FIRST_DETAILED_PREFERRED }, + /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ + { "AEO", 0, EDID_QUIRK_FORCE_6BPC }, + /* Belinea 10 15 55 */ { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 }, { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 }, @@ -4082,6 +4087,9 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid) drm_add_display_info(edid, &connector->display_info, connector); + if (quirks & EDID_QUIRK_FORCE_6BPC) + connector->display_info.bpc = 6; + if (quirks & EDID_QUIRK_FORCE_8BPC) connector->display_info.bpc = 8; -- cgit v1.2.3 From 196f954e250943df414efd3d632254c29be38e59 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 6 Jul 2016 12:05:45 +0200 Subject: drm/i915/dp: Revert "drm/i915/dp: fall back to 18 bpp when sink capability is unknown" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 013dd9e03872 ("drm/i915/dp: fall back to 18 bpp when sink capability is unknown") This commit introduced a regression into stable kernels, as it reduces output color depth to 6 bpc for any video sink connected to a Displayport connector if that sink doesn't report a specific color depth via EDID, or if our EDID parser doesn't actually recognize the proper bpc from EDID. Affected are active DisplayPort->VGA converters and active DisplayPort->DVI converters. Both should be able to handle 8 bpc, but are degraded to 6 bpc with this patch. The reverted commit was meant to fix Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=105331 A followup patch implements a fix for that specific bug, which is caused by a faulty EDID of the affected DP panel by adding a new EDID quirk for that panel. DP 18 bpp fallback handling and other improvements to DP sink bpc detection will be handled for future kernels in a separate series of patches. Please backport to stable. Signed-off-by: Mario Kleiner Acked-by: Jani Nikula Cc: stable@vger.kernel.org Cc: Ville Syrjälä Cc: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index baeb75388dbe..dcf93b3d4fb6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12106,21 +12106,11 @@ connected_sink_compute_bpp(struct intel_connector *connector, pipe_config->pipe_bpp = connector->base.display_info.bpc*3; } - /* Clamp bpp to default limit on screens without EDID 1.4 */ - if (connector->base.display_info.bpc == 0) { - int type = connector->base.connector_type; - int clamp_bpp = 24; - - /* Fall back to 18 bpp when DP sink capability is unknown. */ - if (type == DRM_MODE_CONNECTOR_DisplayPort || - type == DRM_MODE_CONNECTOR_eDP) - clamp_bpp = 18; - - if (bpp > clamp_bpp) { - DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n", - bpp, clamp_bpp); - pipe_config->pipe_bpp = clamp_bpp; - } + /* Clamp bpp to 8 on screens without EDID 1.4 */ + if (connector->base.display_info.bpc == 0 && bpp > 24) { + DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n", + bpp); + pipe_config->pipe_bpp = 24; } } -- cgit v1.2.3 From 210a021dab639694600450c14b877bf3e3240adc Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 6 Jul 2016 12:05:48 +0200 Subject: drm/edid: Set 8 bpc color depth for displays with "DFP 1.x compliant TMDS". MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to E-EDID spec 1.3, table 3.9, a digital video sink with the "DFP 1.x compliant TMDS" bit set is "signal compatible with VESA DFP 1.x TMDS CRGB, 1 pixel / clock, up to 8 bits / color MSB aligned". For such displays, the DFP spec 1.0, section 3.10 "EDID support" says: "If the DFP monitor only supports EDID 1.X (1.1, 1.2, etc.) without extensions, the host will make the following assumptions: 1. 24-bit MSB-aligned RGB TFT 2. DE polarity is active high 3. H and V syncs are active high 4. Established CRT timings will be used 5. Dithering will not be enabled on the host" So if we don't know the bit depth of the display from additional colorimetry info we should assume 8 bpc / 24 bpp by default. This patch adds info->bpc = 8 assignement for that case. Signed-off-by: Mario Kleiner Cc: Jani Nikula Cc: Ville Syrjälä Cc: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 2cb472b9976a..637a0aa4d3a0 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3867,6 +3867,20 @@ static void drm_add_display_info(struct edid *edid, /* HDMI deep color modes supported? Assign to info, if so */ drm_assign_hdmi_deep_color_info(edid, info, connector); + /* + * Digital sink with "DFP 1.x compliant TMDS" according to EDID 1.3? + * + * For such displays, the DFP spec 1.0, section 3.10 "EDID support" + * tells us to assume 8 bpc color depth if the EDID doesn't have + * extensions which tell otherwise. + */ + if ((info->bpc == 0) && (edid->revision < 4) && + (edid->input & DRM_EDID_DIGITAL_TYPE_DVI)) { + info->bpc = 8; + DRM_DEBUG("%s: Assigning DFP sink color depth as %d bpc.\n", + connector->name, info->bpc); + } + /* Only defined for 1.4 with digital displays */ if (edid->revision < 4) return; -- cgit v1.2.3 From 36e9d08b58f44c3a02974c405ccaaa6ecfaf05b8 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 9 Aug 2016 02:16:18 +0200 Subject: drm/cirrus: Fix NULL pointer dereference when registering the fbdev cirrus_modeset_init() is initializing/registering the emulated fbdev and, since commit c61b93fe51b1 ("drm/atomic: Fix remaining places where !funcs->best_encoder is valid"), DRM internals can access/test some of the fields in mode_config->funcs as part of the fbdev registration process. Make sure dev->mode_config.funcs is properly set to avoid dereferencing a NULL pointer. Reported-by: Mike Marshall Reported-by: Eric W. Biederman Signed-off-by: Boris Brezillon Fixes: c61b93fe51b1 ("drm/atomic: Fix remaining places where !funcs->best_encoder is valid") Signed-off-by: Dave Airlie --- drivers/gpu/drm/cirrus/cirrus_main.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/cirrus/cirrus_main.c b/drivers/gpu/drm/cirrus/cirrus_main.c index 80446e2d3ab6..76bcb43e7c06 100644 --- a/drivers/gpu/drm/cirrus/cirrus_main.c +++ b/drivers/gpu/drm/cirrus/cirrus_main.c @@ -185,14 +185,23 @@ int cirrus_driver_load(struct drm_device *dev, unsigned long flags) goto out; } + /* + * cirrus_modeset_init() is initializing/registering the emulated fbdev + * and DRM internals can access/test some of the fields in + * mode_config->funcs as part of the fbdev registration process. + * Make sure dev->mode_config.funcs is properly set to avoid + * dereferencing a NULL pointer. + * FIXME: mode_config.funcs assignment should probably be done in + * cirrus_modeset_init() (that's a common pattern seen in other DRM + * drivers). + */ + dev->mode_config.funcs = &cirrus_mode_funcs; r = cirrus_modeset_init(cdev); if (r) { dev_err(&dev->pdev->dev, "Fatal error during modeset init: %d\n", r); goto out; } - dev->mode_config.funcs = (void *)&cirrus_mode_funcs; - return 0; out: cirrus_driver_unload(dev); -- cgit v1.2.3 From 164793379ad3b7ef5fc5a28260c111358892dff3 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 28 Jul 2016 15:39:41 +1000 Subject: cxl: Fix NULL dereference in cxl_context_init() on PowerVM guests Commit f67a6722d650 ("cxl: Workaround PE=0 hardware limitation in Mellanox CX4") added a "min_pe" field to struct cxl_service_layer_ops, to allow us to work around a Mellanox CX-4 hardware limitation. When allocating the PE number in cxl_context_init(), we read from ctx->afu->adapter->native->sl_ops->min_pe to get the minimum PE number. Unsurprisingly, in a PowerVM guest ctx->afu->adapter->native is NULL, and guests don't have a cxl_service_layer_ops struct anywhere. Move min_pe from struct cxl_service_layer_ops to struct cxl so it's accessible in both native and PowerVM environments. For the Mellanox CX-4, set the min_pe value in set_sl_ops(). Fixes: f67a6722d650 ("cxl: Workaround PE=0 hardware limitation in Mellanox CX4") Reported-by: Frederic Barrat Signed-off-by: Andrew Donnellan Acked-by: Ian Munsie Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 3 +-- drivers/misc/cxl/cxl.h | 2 +- drivers/misc/cxl/pci.c | 3 ++- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index bdee9a01ef35..c466ee2b0c97 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -90,8 +90,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, */ mutex_lock(&afu->contexts_lock); idr_preload(GFP_KERNEL); - i = idr_alloc(&ctx->afu->contexts_idr, ctx, - ctx->afu->adapter->native->sl_ops->min_pe, + i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe, ctx->afu->num_procs, GFP_NOWAIT); idr_preload_end(); mutex_unlock(&afu->contexts_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index de090533f18c..344a0ff8f8c7 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -561,7 +561,6 @@ struct cxl_service_layer_ops { u64 (*timebase_read)(struct cxl *adapter); int capi_mode; bool needs_reset_before_disable; - int min_pe; }; struct cxl_native { @@ -603,6 +602,7 @@ struct cxl { struct bin_attribute cxl_attr; int adapter_num; int user_irqs; + int min_pe; u64 ps_size; u16 psl_rev; u16 base_image; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index d152e2de8c93..1d0347c36e6d 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1521,14 +1521,15 @@ static const struct cxl_service_layer_ops xsl_ops = { .write_timebase_ctrl = write_timebase_ctrl_xsl, .timebase_read = timebase_read_xsl, .capi_mode = OPAL_PHB_CAPI_MODE_DMA, - .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */ }; static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) { if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) { + /* Mellanox CX-4 */ dev_info(&adapter->dev, "Device uses an XSL\n"); adapter->native->sl_ops = &xsl_ops; + adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */ } else { dev_info(&adapter->dev, "Device uses a PSL\n"); adapter->native->sl_ops = &psl_ops; -- cgit v1.2.3 From 6fd40f192a9dba391b2d84882f1ed3169c52b714 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 22 Jul 2016 19:01:36 +1000 Subject: cxl: Fix sparse warnings Make native_irq_wait() static and use NULL rather than 0 to initialise phb->cfg_data in cxl_pci_vphb_add() to remove sparse warnings. Signed-off-by: Andrew Donnellan Reviewed-by: Matthew R. Ochs Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/native.c | 2 +- drivers/misc/cxl/vphb.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 3bcdaee11ba1..e606fdc4bc9c 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -924,7 +924,7 @@ static irqreturn_t native_irq_multiplexed(int irq, void *data) return fail_psl_irq(afu, &irq_info); } -void native_irq_wait(struct cxl_context *ctx) +static void native_irq_wait(struct cxl_context *ctx) { u64 dsisr; int timeout = 1000; diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index dee8def1c193..7ada5f1b7bb6 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -221,7 +221,7 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) /* Setup the PHB using arch provided callback */ phb->ops = &cxl_pcie_pci_ops; phb->cfg_addr = NULL; - phb->cfg_data = 0; + phb->cfg_data = NULL; phb->private_data = afu; phb->controller_ops = cxl_pci_controller_ops; -- cgit v1.2.3 From f3b0946d629c8bfbd3e5f038e30cb9c711a35f10 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Jul 2016 17:18:33 +0100 Subject: genirq/msi: Make sure PCI MSIs are activated early Bharat Kumar Gogada reported issues with the generic MSI code, where the end-point ended up with garbage in its MSI configuration (both for the vector and the message). It turns out that the two MSI paths in the kernel are doing slightly different things: generic MSI: disable MSI -> allocate MSI -> enable MSI -> setup EP PCI MSI: disable MSI -> allocate MSI -> setup EP -> enable MSI And it turns out that end-points are allowed to latch the content of the MSI configuration registers as soon as MSIs are enabled. In Bharat's case, the end-point ends up using whatever was there already, which is not what you want. In order to make things converge, we introduce a new MSI domain flag (MSI_FLAG_ACTIVATE_EARLY) that is unconditionally set for PCI/MSI. When set, this flag forces the programming of the end-point as soon as the MSIs are allocated. A consequence of this is that we have an extra activate in irq_startup, but that should be without much consequence. tglx: - Several people reported a VMWare regression with PCI/MSI-X passthrough. It turns out that the patch also cures that issue. - We need to have a look at the MSI disable interrupt path, where we write the msg to all zeros without disabling MSI in the PCI device. Is that correct? Fixes: 52f518a3a7c2 "x86/MSI: Use hierarchical irqdomains to manage MSI interrupts" Reported-and-tested-by: Bharat Kumar Gogada Reported-and-tested-by: Foster Snowhill Reported-by: Matthias Prager Reported-by: Jason Taylor Signed-off-by: Marc Zyngier Acked-by: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1468426713-31431-1-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 2 ++ include/linux/msi.h | 2 ++ kernel/irq/msi.c | 11 +++++++++++ 3 files changed, 15 insertions(+) (limited to 'drivers') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a02981efdad5..eafa6138a6b8 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1411,6 +1411,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) pci_msi_domain_update_chip_ops(info); + info->flags |= MSI_FLAG_ACTIVATE_EARLY; + domain = msi_create_irq_domain(fwnode, info, parent); if (!domain) return NULL; diff --git a/include/linux/msi.h b/include/linux/msi.h index 4f0bfe5912b2..e8c81fbd5f9c 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -270,6 +270,8 @@ enum { MSI_FLAG_MULTI_PCI_MSI = (1 << 2), /* Support PCI MSIX interrupts */ MSI_FLAG_PCI_MSIX = (1 << 3), + /* Needs early activate, required for PCI */ + MSI_FLAG_ACTIVATE_EARLY = (1 << 4), }; int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 54999350162c..19e9dfbe97fa 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -359,6 +359,17 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, else dev_dbg(dev, "irq [%d-%d] for MSI\n", virq, virq + desc->nvec_used - 1); + /* + * This flag is set by the PCI layer as we need to activate + * the MSI entries before the PCI layer enables MSI in the + * card. Otherwise the card latches a random msi message. + */ + if (info->flags & MSI_FLAG_ACTIVATE_EARLY) { + struct irq_data *irq_data; + + irq_data = irq_domain_get_irq_data(domain, desc->irq); + irq_domain_activate_irq(irq_data); + } } return 0; -- cgit v1.2.3 From 58625edf9e2515ed41dac2a24fa8004030a87b87 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 2 Aug 2016 14:16:31 +0000 Subject: virtio: fix memory leak in virtqueue_add() When using the indirect buffers feature, 'desc' is allocated in virtqueue_add() but isn't freed before leaving on a ring full error, causing a memory leak. For example, it seems rather clear that this can trigger with virtio net if mergeable buffers are not used. Cc: stable@vger.kernel.org Signed-off-by: Wei Yongjun Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 114a0c88afb8..5ed228ddadba 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -327,6 +327,8 @@ static inline int virtqueue_add(struct virtqueue *_vq, * host should service the ring ASAP. */ if (out_sgs) vq->notify(&vq->vq); + if (indirect) + kfree(desc); END_USE(vq); return -ENOSPC; } -- cgit v1.2.3 From 3cc36f6e34bd2d92d23be7b598ba5e639c47b01a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 3 Aug 2016 07:18:51 +0300 Subject: virtio: fix error handling for debug builds On error, virtqueue_add calls START_USE but not END_USE. Thankfully that's normally empty anyway, but might not be when debugging. Fix it up. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5ed228ddadba..e383ecdaca59 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -428,6 +428,7 @@ unmap_release: if (indirect) kfree(desc); + END_USE(vq); return -EIO; } -- cgit v1.2.3 From 3fda5d6e580193fa005014355b3a61498f1b3ae0 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 4 Aug 2016 14:52:53 +0100 Subject: vhost/vsock: fix vhost virtio_vsock_pkt use-after-free Stash the packet length in a local variable before handing over ownership of the packet to virtio_transport_recv_pkt() or virtio_transport_free_pkt(). This patch solves the use-after-free since pkt is no longer guaranteed to be alive. Reported-by: Dan Carpenter Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 0ddf3a2dbfc4..e3b30ea9ece5 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -307,6 +307,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) vhost_disable_notify(&vsock->dev, vq); for (;;) { + u32 len; + if (!vhost_vsock_more_replies(vsock)) { /* Stop tx until the device processes already * pending replies. Leave tx virtqueue @@ -334,13 +336,15 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) continue; } + len = pkt->len; + /* Only accept correctly addressed packets */ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) virtio_transport_recv_pkt(pkt); else virtio_transport_free_pkt(pkt); - vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); + vhost_add_used(vq, head, sizeof(pkt->hdr) + len); added = true; } -- cgit v1.2.3 From 347a529398e8e723338cca5d8a8ae2d9e7e93448 Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Tue, 9 Aug 2016 16:39:20 +0800 Subject: virtio_blk: Fix a slient kernel panic We do a lot of memory allocation in function init_vq, and don't handle the allocation failure properly. Then this function will return 0, although initialization fails due to lacking memory. At that moment, kernel will panic in guest machine, if virtio is used to drive disk. To fix this bug, we should take care of allocation failure, and return correct value to let caller know what happen. Tested-by: Chao Fan Signed-off-by: Minfei Huang Signed-off-by: Minfei Huang Reviewed-by: Cornelia Huck Reviewed-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1523e05c46fc..93b1aaa5ba3b 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -391,22 +391,16 @@ static int init_vq(struct virtio_blk *vblk) num_vqs = 1; vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL); - if (!vblk->vqs) { - err = -ENOMEM; - goto out; - } + if (!vblk->vqs) + return -ENOMEM; names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL); - if (!names) - goto err_names; - callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL); - if (!callbacks) - goto err_callbacks; - vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL); - if (!vqs) - goto err_vqs; + if (!names || !callbacks || !vqs) { + err = -ENOMEM; + goto out; + } for (i = 0; i < num_vqs; i++) { callbacks[i] = virtblk_done; @@ -417,7 +411,7 @@ static int init_vq(struct virtio_blk *vblk) /* Discover virtqueues and write information to configuration. */ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); if (err) - goto err_find_vqs; + goto out; for (i = 0; i < num_vqs; i++) { spin_lock_init(&vblk->vqs[i].lock); @@ -425,16 +419,12 @@ static int init_vq(struct virtio_blk *vblk) } vblk->num_vqs = num_vqs; - err_find_vqs: +out: kfree(vqs); - err_vqs: kfree(callbacks); - err_callbacks: kfree(names); - err_names: if (err) kfree(vblk->vqs); - out: return err; } -- cgit v1.2.3 From 2ab0d56aadbcd120b8fa524b4a1142e8b06e13c8 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 7 Jul 2016 17:07:56 +0200 Subject: virtio/s390: keep early_put_chars In case the registration of the hvc tty never happens AND the kernel thinks that hvc0 is the preferred console we should keep the early printk function to avoid a kernel panic due to code being removed. Signed-off-by: Christian Borntraeger Signed-off-by: Jing Liu Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- drivers/s390/virtio/kvm_virtio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c index 1d060fd293a3..b0a849f02df3 100644 --- a/drivers/s390/virtio/kvm_virtio.c +++ b/drivers/s390/virtio/kvm_virtio.c @@ -482,7 +482,7 @@ static int __init kvm_devices_init(void) } /* code for early console output with virtio_console */ -static __init int early_put_chars(u32 vtermno, const char *buf, int count) +static int early_put_chars(u32 vtermno, const char *buf, int count) { char scratch[17]; unsigned int len = count; -- cgit v1.2.3 From 3b2fbb3f06efe5bd2dfdce2a1db703e23c1a78af Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 7 Jul 2016 17:07:57 +0200 Subject: virtio/s390: deprecate old transport There only ever have been two host implementations of the old s390-virtio (pre-ccw) transport: the experimental kuli userspace, and qemu. As qemu switched its default to ccw with 2.4 (with most users having used ccw well before that) and removed the old transport entirely in 2.6, s390-virtio probably hasn't been in active use for quite some time and is therefore likely to bitrot. Let's start the slow march towards removing the code by deprecating it. Note that this also deprecates the early virtio console code, which has been causing trouble in the guest without being wired up in any relevant hypervisor code. Acked-by: Christian Borntraeger Reviewed-by: Dong Jia Shi Reviewed-by: Sascha Silbe Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- arch/s390/Kconfig | 13 +++++++++++++ drivers/s390/virtio/Makefile | 6 +++++- drivers/s390/virtio/kvm_virtio.c | 2 ++ 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9e607bf2d640..5a2907ee8c93 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -871,4 +871,17 @@ config S390_GUEST Select this option if you want to run the kernel as a guest under the KVM hypervisor. +config S390_GUEST_OLD_TRANSPORT + def_bool y + prompt "Guest support for old s390 virtio transport (DEPRECATED)" + depends on S390_GUEST + help + Enable this option to add support for the old s390-virtio + transport (i.e. virtio devices NOT based on virtio-ccw). This + type of virtio devices is only available on the experimental + kuli userspace or with old (< 2.6) qemu. If you are running + with a modern version of qemu (which supports virtio-ccw since + 1.4 and uses it by default since version 2.4), you probably won't + need this. + endmenu diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile index 241891a57caf..df40692a9011 100644 --- a/drivers/s390/virtio/Makefile +++ b/drivers/s390/virtio/Makefile @@ -6,4 +6,8 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -obj-$(CONFIG_S390_GUEST) += kvm_virtio.o virtio_ccw.o +s390-virtio-objs := virtio_ccw.o +ifdef CONFIG_S390_GUEST_OLD_TRANSPORT +s390-virtio-objs += kvm_virtio.o +endif +obj-$(CONFIG_S390_GUEST) += $(s390-virtio-objs) diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c index b0a849f02df3..5e5c11f37b24 100644 --- a/drivers/s390/virtio/kvm_virtio.c +++ b/drivers/s390/virtio/kvm_virtio.c @@ -458,6 +458,8 @@ static int __init kvm_devices_init(void) if (test_devices_support(total_memory_size) < 0) return -ENODEV; + pr_warn("The s390-virtio transport is deprecated. Please switch to a modern host providing virtio-ccw.\n"); + rc = vmem_add_mapping(total_memory_size, PAGE_SIZE); if (rc) return rc; -- cgit v1.2.3 From d8734849d8007dacaa40b31ba7319ed28077141d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 8 Aug 2016 15:24:02 +0200 Subject: rbd: nuke the 32-bit pool id check ceph_file_layout::pool_id is now s64. rbd_add_get_pool_id() and ceph_pg_poolid_by_name() both return an int, so it's bogus anyway. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- drivers/block/rbd.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 07668a6f0607..6c6519f6492a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5337,15 +5337,6 @@ static ssize_t do_rbd_add(struct bus_type *bus, } spec->pool_id = (u64)rc; - /* The ceph file layout needs to fit pool id in 32 bits */ - - if (spec->pool_id > (u64)U32_MAX) { - rbd_warn(NULL, "pool id too large (%llu > %u)", - (unsigned long long)spec->pool_id, U32_MAX); - rc = -EIO; - goto err_out_client; - } - rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts); if (!rbd_dev) { rc = -ENOMEM; -- cgit v1.2.3 From a026bb12cc57d758e045126a252e12e868076cb4 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 3 Aug 2016 18:08:55 +0100 Subject: drivers/perf: arm-pmu: convert arm_pmu_mutex to spinlock arm_pmu_mutex is never held long and we don't want to sleep while the lock is being held as it's executed in the context of hotplug notifiers. So it can be converted to a simple spinlock instead. Without this patch we get the following warning: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 in_atomic(): 1, irqs_disabled(): 128, pid: 0, name: swapper/2 no locks held by swapper/2/0. irq event stamp: 381314 hardirqs last enabled at (381313): _raw_spin_unlock_irqrestore+0x7c/0x88 hardirqs last disabled at (381314): cpu_die+0x28/0x48 softirqs last enabled at (381294): _local_bh_enable+0x28/0x50 softirqs last disabled at (381293): irq_enter+0x58/0x78 CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.7.0 #12 Call trace: dump_backtrace+0x0/0x220 show_stack+0x24/0x30 dump_stack+0xb4/0xf0 ___might_sleep+0x1d8/0x1f0 __might_sleep+0x5c/0x98 mutex_lock_nested+0x54/0x400 arm_perf_starting_cpu+0x34/0xb0 cpuhp_invoke_callback+0x88/0x3d8 notify_cpu_starting+0x78/0x98 secondary_start_kernel+0x108/0x1a8 This patch converts the mutex to spinlock to eliminate the above warnings. This constraints pmu->reset to be non-blocking call which is the case with all the ARM PMU backends. Cc: Stephen Boyd Fixes: 37b502f121ad ("arm/perf: Fix hotplug state machine conversion") Acked-by: Mark Rutland Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 6ccb994bdfcb..4c9a537a1265 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -688,7 +688,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) return 0; } -static DEFINE_MUTEX(arm_pmu_mutex); +static DEFINE_SPINLOCK(arm_pmu_lock); static LIST_HEAD(arm_pmu_list); /* @@ -701,7 +701,7 @@ static int arm_perf_starting_cpu(unsigned int cpu) { struct arm_pmu *pmu; - mutex_lock(&arm_pmu_mutex); + spin_lock(&arm_pmu_lock); list_for_each_entry(pmu, &arm_pmu_list, entry) { if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) @@ -709,7 +709,7 @@ static int arm_perf_starting_cpu(unsigned int cpu) if (pmu->reset) pmu->reset(pmu); } - mutex_unlock(&arm_pmu_mutex); + spin_unlock(&arm_pmu_lock); return 0; } @@ -821,9 +821,9 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu) if (!cpu_hw_events) return -ENOMEM; - mutex_lock(&arm_pmu_mutex); + spin_lock(&arm_pmu_lock); list_add_tail(&cpu_pmu->entry, &arm_pmu_list); - mutex_unlock(&arm_pmu_mutex); + spin_unlock(&arm_pmu_lock); err = cpu_pm_pmu_register(cpu_pmu); if (err) @@ -859,9 +859,9 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu) return 0; out_unregister: - mutex_lock(&arm_pmu_mutex); + spin_lock(&arm_pmu_lock); list_del(&cpu_pmu->entry); - mutex_unlock(&arm_pmu_mutex); + spin_unlock(&arm_pmu_lock); free_percpu(cpu_hw_events); return err; } @@ -869,9 +869,9 @@ out_unregister: static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) { cpu_pm_pmu_unregister(cpu_pmu); - mutex_lock(&arm_pmu_mutex); + spin_lock(&arm_pmu_lock); list_del(&cpu_pmu->entry); - mutex_unlock(&arm_pmu_mutex); + spin_unlock(&arm_pmu_lock); free_percpu(cpu_pmu->hw_events); } -- cgit v1.2.3 From 7f1d642fbb5c356519617c24757a0cbed7f800a8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Jul 2016 15:39:02 +0100 Subject: drivers/perf: arm-pmu: Fix handling of SPI lacking "interrupt-affinity" property Patch 19a469a58720 ("drivers/perf: arm-pmu: Handle per-interrupt affinity mask") added support for partitionned PPI setups, but inadvertently broke setups using SPIs without the "interrupt-affinity" property (which is the case for UP platforms). This patch restore the broken functionnality by testing whether the interrupt is percpu or not instead of relying on the using_spi flag that really means "SPI *and* interrupt-affinity property". Acked-by: Mark Rutland Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Fixes: 19a469a58720 ("drivers/perf: arm-pmu: Handle per-interrupt affinity mask") Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 4c9a537a1265..c494613c1909 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -967,11 +967,12 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) /* If we didn't manage to parse anything, try the interrupt affinity */ if (cpumask_weight(&pmu->supported_cpus) == 0) { - if (!using_spi) { + int irq = platform_get_irq(pdev, 0); + + if (irq_is_percpu(irq)) { /* If using PPIs, check the affinity of the partition */ - int ret, irq; + int ret; - irq = platform_get_irq(pdev, 0); ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus); if (ret) { kfree(irqs); -- cgit v1.2.3 From a3d1ddd932bc86f0f7027079754ab0aefd259109 Mon Sep 17 00:00:00 2001 From: Brian King Date: Mon, 8 Aug 2016 17:53:12 -0500 Subject: ipr: Fix sync scsi scan Commit b195d5e2bffd ("ipr: Wait to do async scan until scsi host is initialized") fixed async scan for ipr, but broke sync scan for ipr. This fixes sync scan back up. Signed-off-by: Brian King Reported-and-tested-by: Michael Ellerman Signed-off-by: Linus Torvalds --- drivers/scsi/ipr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index bf85974be862..17d04c702e1b 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -10410,8 +10410,11 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) __ipr_remove(pdev); return rc; } + spin_lock_irqsave(ioa_cfg->host->host_lock, flags); + ioa_cfg->scan_enabled = 1; + schedule_work(&ioa_cfg->work_q); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); - scsi_scan_host(ioa_cfg->host); ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight; if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { @@ -10421,10 +10424,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) } } - spin_lock_irqsave(ioa_cfg->host->host_lock, flags); - ioa_cfg->scan_enabled = 1; - schedule_work(&ioa_cfg->work_q); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); + scsi_scan_host(ioa_cfg->host); + return 0; } -- cgit v1.2.3 From c6d2ee09c2fffd3efdd31be2b2811d081a45bb99 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Mon, 8 Aug 2016 11:57:48 +0200 Subject: cxl: Set psl_fir_cntl to production environment value Switch the setting of psl_fir_cntl from debug to production environment recommended value. It mostly affects the PSL behavior when an error is raised in psl_fir1/2. Tested with cxlflash. Signed-off-by: Frederic Barrat Reviewed-by: Uma Krishnan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 1d0347c36e6d..6f0c4ac4b649 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -379,7 +379,7 @@ static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, u64 *capp_unit_id static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev) { - u64 psl_dsnctl; + u64 psl_dsnctl, psl_fircntl; u64 chipid; u64 capp_unit_id; int rc; @@ -398,8 +398,11 @@ static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_ cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL); /* snoop write mask */ cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL); - /* set fir_accum */ - cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL); + /* set fir_cntl to recommended value for production env */ + psl_fircntl = (0x2ULL << (63-3)); /* ce_report */ + psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */ + psl_fircntl |= 0x1ULL; /* ce_thresh */ + cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl); /* for debugging with trace arrays */ cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL); -- cgit v1.2.3 From 73984137d32bb6e48646daea60ebe1457a01a061 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Aug 2016 16:27:38 -0700 Subject: rapidio: dereferencing an error pointer Original patch: https://lkml.org/lkml/2016/8/4/32 If riocm_ch_alloc() fails then we end up dereferencing the error pointer. The problem is that we're not unwinding in the reverse order from how we allocate things so it gets confusing. I've changed this around so now "ch" is NULL when we are done with it after we call riocm_put_channel(). That way we can check if it's NULL and avoid calling riocm_put_channel() on it twice. I renamed err_nodev to err_put_new_ch so that it better reflects what the goto does. Then because we had flipping things around, it means we don't neeed to initialize the pointers to NULL and we can remove an if statement and pull things in an indent level. Link: http://lkml.kernel.org/r/20160805152406.20713-1-alexandre.bounine@idt.com Signed-off-by: Dan Carpenter Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Andre van Herk Cc: Barry Wood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio_cm.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index cecc15a880de..3fa17ac8df54 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -1080,8 +1080,8 @@ static int riocm_send_ack(struct rio_channel *ch) static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, long timeout) { - struct rio_channel *ch = NULL; - struct rio_channel *new_ch = NULL; + struct rio_channel *ch; + struct rio_channel *new_ch; struct conn_req *req; struct cm_peer *peer; int found = 0; @@ -1155,6 +1155,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, spin_unlock_bh(&ch->lock); riocm_put_channel(ch); + ch = NULL; kfree(req); down_read(&rdev_sem); @@ -1172,7 +1173,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, if (!found) { /* If peer device object not found, simply ignore the request */ err = -ENODEV; - goto err_nodev; + goto err_put_new_ch; } new_ch->rdev = peer->rdev; @@ -1184,15 +1185,16 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, *new_ch_id = new_ch->id; return new_ch; + +err_put_new_ch: + spin_lock_bh(&idr_lock); + idr_remove(&ch_idr, new_ch->id); + spin_unlock_bh(&idr_lock); + riocm_put_channel(new_ch); + err_put: - riocm_put_channel(ch); -err_nodev: - if (new_ch) { - spin_lock_bh(&idr_lock); - idr_remove(&ch_idr, new_ch->id); - spin_unlock_bh(&idr_lock); - riocm_put_channel(new_ch); - } + if (ch) + riocm_put_channel(ch); *new_ch_id = 0; return ERR_PTR(err); } -- cgit v1.2.3 From 6862e6ad95e984991a6ceec592cf67831658f928 Mon Sep 17 00:00:00 2001 From: Austin Christ Date: Thu, 11 Aug 2016 11:42:00 +0100 Subject: efi/capsule: Allocate whole capsule into virtual memory According to UEFI 2.6 section 7.5.3, the capsule should be in contiguous virtual memory and firmware may consume the capsule immediately. To correctly implement this functionality, the kernel driver needs to vmap the entire capsule at the time it is made available to firmware. The virtual allocation of the capsule update has been changed from kmap, which was only allocating the first page of the update, to vmap, and allocates the entire data payload. Signed-off-by: Austin Christ Signed-off-by: Matt Fleming Reviewed-by: Matt Fleming Reviewed-by: Lee, Chun-Yi Cc: # v4.7 Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Bryan O'Donoghue Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kweh Hock Leong Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1470912120-22831-3-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- drivers/firmware/efi/capsule-loader.c | 8 +++++--- drivers/firmware/efi/capsule.c | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index c99c24bc79b0..9ae6c116c474 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -16,6 +16,7 @@ #include #include #include +#include #define NO_FURTHER_WRITE_ACTION -1 @@ -108,14 +109,15 @@ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info) int ret; void *cap_hdr_temp; - cap_hdr_temp = kmap(cap_info->pages[0]); + cap_hdr_temp = vmap(cap_info->pages, cap_info->index, + VM_MAP, PAGE_KERNEL); if (!cap_hdr_temp) { - pr_debug("%s: kmap() failed\n", __func__); + pr_debug("%s: vmap() failed\n", __func__); return -EFAULT; } ret = efi_capsule_update(cap_hdr_temp, cap_info->pages); - kunmap(cap_info->pages[0]); + vunmap(cap_hdr_temp); if (ret) { pr_err("%s: efi_capsule_update() failed\n", __func__); return ret; diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c index 53b9fd2293ee..6eedff45e6d7 100644 --- a/drivers/firmware/efi/capsule.c +++ b/drivers/firmware/efi/capsule.c @@ -190,9 +190,9 @@ efi_capsule_update_locked(efi_capsule_header_t *capsule, * map the capsule described by @capsule with its data in @pages and * send it to the firmware via the UpdateCapsule() runtime service. * - * @capsule must be a virtual mapping of the first page in @pages - * (@pages[0]) in the kernel address space. That is, a - * capsule_header_t that describes the entire contents of the capsule + * @capsule must be a virtual mapping of the complete capsule update in the + * kernel address space, as the capsule can be consumed immediately. + * A capsule_header_t that describes the entire contents of the capsule * must be at the start of the first data page. * * Even though this function will validate that the firmware supports -- cgit v1.2.3 From c21377f8366c95440d533edbe47d070f662c62ef Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 11 Aug 2016 09:35:57 -0600 Subject: nvme: Suspend all queues before deletion When nvme_delete_queue fails in the first pass of the nvme_disable_io_queues() loop, we return early, failing to suspend all of the IO queues. Later, on the nvme_pci_disable path, this causes us to disable MSI without actually having freed all the IRQs, which triggers the BUG_ON in free_msi_irqs(), as show below. This patch refactors nvme_disable_io_queues to suspend all queues before start submitting delete queue commands. This way, we ensure that we have at least returned every IRQ before continuing with the removal path. [ 487.529200] kernel BUG at ../drivers/pci/msi.c:368! cpu 0x46: Vector: 700 (Program Check) at [c0000078c5b83650] pc: c000000000627a50: free_msi_irqs+0x90/0x200 lr: c000000000627a40: free_msi_irqs+0x80/0x200 sp: c0000078c5b838d0 msr: 9000000100029033 current = 0xc0000078c5b40000 paca = 0xc000000002bd7600 softe: 0 irq_happened: 0x01 pid = 1376, comm = kworker/70:1H kernel BUG at ../drivers/pci/msi.c:368! Linux version 4.7.0.mainline+ (root@iod76) (gcc version 5.3.1 20160413 (Ubuntu/IBM 5.3.1-14ubuntu2.1) ) #104 SMP Fri Jul 29 09:20:17 CDT 2016 enter ? for help [c0000078c5b83920] d0000000363b0cd8 nvme_dev_disable+0x208/0x4f0 [nvme] [c0000078c5b83a10] d0000000363b12a4 nvme_timeout+0xe4/0x250 [nvme] [c0000078c5b83ad0] c0000000005690e4 blk_mq_rq_timed_out+0x64/0x110 [c0000078c5b83b40] c00000000056c930 bt_for_each+0x160/0x170 [c0000078c5b83bb0] c00000000056d928 blk_mq_queue_tag_busy_iter+0x78/0x110 [c0000078c5b83c00] c0000000005675d8 blk_mq_timeout_work+0xd8/0x1b0 [c0000078c5b83c50] c0000000000e8cf0 process_one_work+0x1e0/0x590 [c0000078c5b83ce0] c0000000000e9148 worker_thread+0xa8/0x660 [c0000078c5b83d80] c0000000000f2090 kthread+0x110/0x130 [c0000078c5b83e30] c0000000000095f0 ret_from_kernel_thread+0x5c/0x6c Signed-off-by: Gabriel Krisman Bertazi Cc: Brian King Cc: Keith Busch Cc: linux-nvme@lists.infradead.org Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d7c33f9361aa..8dcf5a960951 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1543,15 +1543,10 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) reinit_completion(&dev->ioq_wait); retry: timeout = ADMIN_TIMEOUT; - for (; i > 0; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; - - if (!pass) - nvme_suspend_queue(nvmeq); - if (nvme_delete_queue(nvmeq, opcode)) + for (; i > 0; i--, sent++) + if (nvme_delete_queue(dev->queues[i], opcode)) break; - ++sent; - } + while (sent--) { timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout); if (timeout == 0) @@ -1693,11 +1688,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); csts = readl(dev->bar + NVME_REG_CSTS); } + + for (i = dev->queue_count - 1; i > 0; i--) + nvme_suspend_queue(dev->queues[i]); + if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { - for (i = dev->queue_count - 1; i >= 0; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; - nvme_suspend_queue(nvmeq); - } + nvme_suspend_queue(dev->queues[0]); } else { nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); -- cgit v1.2.3