From 26047e2d6bde5b2e1b791e0ec1c3234894fdf3fa Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 30 Nov 2012 11:28:55 +0100 Subject: ASoC: cs4271: fix sparse warning Make the flag in the pdata of type bool to fix a sparse warning. Signed-off-by: Daniel Mack Reported-by: Fengguang Wu Signed-off-by: Mark Brown --- include/sound/cs4271.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/cs4271.h b/include/sound/cs4271.h index 6d9e15ed1dcf..dd8c48d14ed9 100644 --- a/include/sound/cs4271.h +++ b/include/sound/cs4271.h @@ -19,7 +19,7 @@ struct cs4271_platform_data { int gpio_nreset; /* GPIO driving Reset pin, if any */ - int amutec_eq_bmutec:1; /* flag to enable AMUTEC=BMUTEC */ + bool amutec_eq_bmutec; /* flag to enable AMUTEC=BMUTEC */ }; #endif /* __CS4271_H */ -- cgit v1.2.3 From 9bde4f0b1c83d1129a9fc8ec5b2611ba6dab1215 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Dec 2012 16:05:00 +0000 Subject: ASoC: core: Fix SOC_DOUBLE_RANGE() macros Although we've had macros defining double _RANGE controls for a while now they've not actually been backed up properly by the implementation, it's treated everything as mono. Fix that by implementing the handling in the stereo controls, ensuring that the mono controls don't mistakenly get treated as stereo. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- include/sound/soc.h | 10 ++++++---- sound/soc/soc-core.c | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 769e27c774a3..bc56738cb109 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -58,8 +58,9 @@ .info = snd_soc_info_volsw_range, .get = snd_soc_get_volsw_range, \ .put = snd_soc_put_volsw_range, \ .private_value = (unsigned long)&(struct soc_mixer_control) \ - {.reg = xreg, .shift = xshift, .min = xmin,\ - .max = xmax, .platform_max = xmax, .invert = xinvert} } + {.reg = xreg, .rreg = xreg, .shift = xshift, \ + .rshift = xshift, .min = xmin, .max = xmax, \ + .platform_max = xmax, .invert = xinvert} } #define SOC_SINGLE_TLV(xname, reg, shift, max, invert, tlv_array) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ |\ @@ -88,8 +89,9 @@ .info = snd_soc_info_volsw_range, \ .get = snd_soc_get_volsw_range, .put = snd_soc_put_volsw_range, \ .private_value = (unsigned long)&(struct soc_mixer_control) \ - {.reg = xreg, .shift = xshift, .min = xmin,\ - .max = xmax, .platform_max = xmax, .invert = xinvert} } + {.reg = xreg, .rreg = xreg, .shift = xshift, \ + .rshift = xshift, .min = xmin, .max = xmax, \ + .platform_max = xmax, .invert = xinvert} } #define SOC_DOUBLE(xname, reg, shift_left, shift_right, max, invert) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname),\ .info = snd_soc_info_volsw, .get = snd_soc_get_volsw, \ diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 91d592ff67b7..e0d4630bfb4f 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2917,7 +2917,7 @@ int snd_soc_info_volsw_range(struct snd_kcontrol *kcontrol, platform_max = mc->platform_max; uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; - uinfo->count = 1; + uinfo->count = snd_soc_volsw_is_stereo(mc) ? 2 : 1; uinfo->value.integer.min = 0; uinfo->value.integer.max = platform_max - min; @@ -2941,12 +2941,14 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, (struct soc_mixer_control *)kcontrol->private_value; struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); unsigned int reg = mc->reg; + unsigned int rreg = mc->rreg; unsigned int shift = mc->shift; int min = mc->min; int max = mc->max; unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; unsigned int val, val_mask; + int ret; val = ((ucontrol->value.integer.value[0] + min) & mask); if (invert) @@ -2954,7 +2956,21 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; - return snd_soc_update_bits_locked(codec, reg, val_mask, val); + ret = snd_soc_update_bits_locked(codec, reg, val_mask, val); + if (ret != 0) + return ret; + + if (snd_soc_volsw_is_stereo(mc)) { + val = ((ucontrol->value.integer.value[1] + min) & mask); + if (invert) + val = max - val; + val_mask = mask << shift; + val = val << shift; + + ret = snd_soc_update_bits_locked(codec, rreg, val_mask, val); + } + + return ret; } EXPORT_SYMBOL_GPL(snd_soc_put_volsw_range); @@ -2974,11 +2990,13 @@ int snd_soc_get_volsw_range(struct snd_kcontrol *kcontrol, (struct soc_mixer_control *)kcontrol->private_value; struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); unsigned int reg = mc->reg; + unsigned int rreg = mc->rreg; unsigned int shift = mc->shift; int min = mc->min; int max = mc->max; unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; + int ret; ucontrol->value.integer.value[0] = (snd_soc_read(codec, reg) >> shift) & mask; @@ -2988,6 +3006,16 @@ int snd_soc_get_volsw_range(struct snd_kcontrol *kcontrol, ucontrol->value.integer.value[0] = ucontrol->value.integer.value[0] - min; + if (snd_soc_volsw_is_stereo(mc)) { + ucontrol->value.integer.value[1] = + (snd_soc_read(codec, rreg) >> shift) & mask; + if (invert) + ucontrol->value.integer.value[1] = + max - ucontrol->value.integer.value[1]; + ucontrol->value.integer.value[1] = + ucontrol->value.integer.value[1] - min; + } + return 0; } EXPORT_SYMBOL_GPL(snd_soc_get_volsw_range); -- cgit v1.2.3 From aaf4f97f267aa0dbc42f089b33325e61aac8622e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 16 Dec 2012 18:27:33 +0100 Subject: asm-generic/dma-mapping-broken.h: Provide dma_alloc_attrs()/dma_free_attrs() Since commit 0049fb2603b7afb1080776ee691dfa5a3d282357 ("OMAPFB: use dma_alloc_attrs to allocate memory") we have one non-arch user of dma_{alloc,free}_attrs(). Hence provide these functions, as wrappers around dma_{alloc,free}_coherent(). Note that most architectures do it the other way around. But as these are dummy functions, we don't care. Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann --- include/asm-generic/dma-mapping-broken.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/asm-generic/dma-mapping-broken.h b/include/asm-generic/dma-mapping-broken.h index ccf7b4f34a3c..6c32af918c2f 100644 --- a/include/asm-generic/dma-mapping-broken.h +++ b/include/asm-generic/dma-mapping-broken.h @@ -16,6 +16,22 @@ extern void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle); +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + /* attrs is not supported and ignored */ + return dma_alloc_coherent(dev, size, dma_handle, flag); +} + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + /* attrs is not supported and ignored */ + dma_free_coherent(dev, size, cpu_addr, dma_handle); +} + #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -- cgit v1.2.3 From f13a3664e4d1de8adc1fc82b981ba4699a731fa1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 24 Dec 2012 10:51:36 +0530 Subject: CONFIG_GENERIC_SIGALTSTACK build breakage with asm-generic/syscalls.h Saner transition plan for GENERIC_SIGALTSTACK conversion - instead of adding #define sys_sigaltstack sys_sigaltstack in asm/syscalls.h of architecture if it's pulls asm-generic/syscalls.h, only to have those defines removed once all architectures are converted, make the declaration in said asm-generic/syscalls.h conditional on the lack of GENERIC_SIGALTSTACK. Less messy in intermediate stages that way... Signed-off-by: Vineet Gupta Acked-by: Al Viro Cc: james.hogan@imgtec.com Cc: arnd@arndb.de Cc: torvalds@linux-foundation.org Cc: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org Signed-off-by: Al Viro --- include/asm-generic/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h index 58f466ff00d3..1db51b8524e9 100644 --- a/include/asm-generic/syscalls.h +++ b/include/asm-generic/syscalls.h @@ -21,10 +21,12 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long fd, off_t pgoff); #endif +#ifndef CONFIG_GENERIC_SIGALTSTACK #ifndef sys_sigaltstack asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, struct pt_regs *); #endif +#endif #ifndef sys_rt_sigreturn asmlinkage long sys_rt_sigreturn(struct pt_regs *regs); -- cgit v1.2.3 From e909c682d04e55e77a3c9d158e7dc36027195493 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 7 Jan 2013 10:57:14 +0100 Subject: [media] coda: Fix build due to iram.h rename commit c045e3f13 (ARM: imx: include iram.h rather than mach/iram.h) changed the location of iram.h, which causes the following build error when building the coda driver: drivers/media/platform/coda.c:27:23: error: mach/iram.h: No such file or directory drivers/media/platform/coda.c: In function 'coda_probe': drivers/media/platform/coda.c:2000: error: implicit declaration of function 'iram_alloc' drivers/media/platform/coda.c:2001: warning: assignment makes pointer from integer without a cast drivers/media/platform/coda.c: In function 'coda_remove': drivers/media/platform/coda.c:2024: error: implicit declaration of function 'iram_free' Since the content of iram.h is not imx specific, move it to include/linux/platform_data/imx-iram.h instead. This is an intermediate solution until the i.MX iram allocator is converted to the generic SRAM allocator. Signed-off-by: Sascha Hauer Acked-by: Mauro Carvalho Chehab --- arch/arm/mach-imx/iram.h | 41 ---------------------------------- arch/arm/mach-imx/iram_alloc.c | 3 +-- drivers/media/platform/coda.c | 2 +- include/linux/platform_data/imx-iram.h | 41 ++++++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 44 deletions(-) delete mode 100644 arch/arm/mach-imx/iram.h create mode 100644 include/linux/platform_data/imx-iram.h (limited to 'include') diff --git a/arch/arm/mach-imx/iram.h b/arch/arm/mach-imx/iram.h deleted file mode 100644 index 022690c33702..000000000000 --- a/arch/arm/mach-imx/iram.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2010 Freescale Semiconductor, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ -#include - -#ifdef CONFIG_IRAM_ALLOC - -int __init iram_init(unsigned long base, unsigned long size); -void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr); -void iram_free(unsigned long dma_addr, unsigned int size); - -#else - -static inline int __init iram_init(unsigned long base, unsigned long size) -{ - return -ENOMEM; -} - -static inline void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr) -{ - return NULL; -} - -static inline void iram_free(unsigned long base, unsigned long size) {} - -#endif diff --git a/arch/arm/mach-imx/iram_alloc.c b/arch/arm/mach-imx/iram_alloc.c index 6c80424f678e..e05cf407db65 100644 --- a/arch/arm/mach-imx/iram_alloc.c +++ b/arch/arm/mach-imx/iram_alloc.c @@ -22,8 +22,7 @@ #include #include #include - -#include "iram.h" +#include "linux/platform_data/imx-iram.h" static unsigned long iram_phys_base; static void __iomem *iram_virt_base; diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c index 7b8b547f2d51..afadd3aba3e0 100644 --- a/drivers/media/platform/coda.c +++ b/drivers/media/platform/coda.c @@ -23,8 +23,8 @@ #include #include #include +#include -#include #include #include #include diff --git a/include/linux/platform_data/imx-iram.h b/include/linux/platform_data/imx-iram.h new file mode 100644 index 000000000000..022690c33702 --- /dev/null +++ b/include/linux/platform_data/imx-iram.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2010 Freescale Semiconductor, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include + +#ifdef CONFIG_IRAM_ALLOC + +int __init iram_init(unsigned long base, unsigned long size); +void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr); +void iram_free(unsigned long dma_addr, unsigned int size); + +#else + +static inline int __init iram_init(unsigned long base, unsigned long size) +{ + return -ENOMEM; +} + +static inline void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr) +{ + return NULL; +} + +static inline void iram_free(unsigned long base, unsigned long size) {} + +#endif -- cgit v1.2.3 From 901593f2bf221659a605bdc1dcb11376ea934163 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 19 Dec 2012 16:51:06 +0000 Subject: drm: Only evict the blocks required to create the requested hole Avoid clobbering adjacent blocks if they happen to expire earlier and amalgamate together to form the requested hole. In passing this fixes a regression from commit ea7b1dd44867e9cd6bac67e7c9fc3f128b5b255c Author: Daniel Vetter Date: Fri Feb 18 17:59:12 2011 +0100 drm: mm: track free areas implicitly which swaps the end address for size (with a potential overflow) and effectively causes the eviction code to clobber almost all earlier buffers above the evictee. v2: Check the original hole not the adjusted as the coloring may confuse us when later searching for the overlapping nodes. Also make sure that we do apply the range restriction and color adjustment in the same order for both scanning, searching and insertion. v3: Send the version that was actually tested. Note that this seems to be ducttape of decent quality ot paper over some of our unbind related gpu hangs reported since 3.7. It is not fully effective though, and certainly doesn't fix the underlying bug. Signed-off-by: Chris Wilson Cc: Daniel Vetter [danvet: Added note plus bugzilla link and tested-by.] Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=55984 Tested-by: Norbert Preining Acked-by: Dave Airlie --- drivers/gpu/drm/drm_mm.c | 45 +++++++++++++++++---------------------------- include/drm/drm_mm.h | 2 +- 2 files changed, 18 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 2bf9670ba29b..2aa331499f81 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -221,11 +221,13 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node, BUG_ON(!hole_node->hole_follows || node->allocated); - if (mm->color_adjust) - mm->color_adjust(hole_node, color, &adj_start, &adj_end); - if (adj_start < start) adj_start = start; + if (adj_end > end) + adj_end = end; + + if (mm->color_adjust) + mm->color_adjust(hole_node, color, &adj_start, &adj_end); if (alignment) { unsigned tmp = adj_start % alignment; @@ -506,7 +508,7 @@ void drm_mm_init_scan(struct drm_mm *mm, mm->scan_size = size; mm->scanned_blocks = 0; mm->scan_hit_start = 0; - mm->scan_hit_size = 0; + mm->scan_hit_end = 0; mm->scan_check_range = 0; mm->prev_scanned_node = NULL; } @@ -533,7 +535,7 @@ void drm_mm_init_scan_with_range(struct drm_mm *mm, mm->scan_size = size; mm->scanned_blocks = 0; mm->scan_hit_start = 0; - mm->scan_hit_size = 0; + mm->scan_hit_end = 0; mm->scan_start = start; mm->scan_end = end; mm->scan_check_range = 1; @@ -552,8 +554,7 @@ int drm_mm_scan_add_block(struct drm_mm_node *node) struct drm_mm *mm = node->mm; struct drm_mm_node *prev_node; unsigned long hole_start, hole_end; - unsigned long adj_start; - unsigned long adj_end; + unsigned long adj_start, adj_end; mm->scanned_blocks++; @@ -570,14 +571,8 @@ int drm_mm_scan_add_block(struct drm_mm_node *node) node->node_list.next = &mm->prev_scanned_node->node_list; mm->prev_scanned_node = node; - hole_start = drm_mm_hole_node_start(prev_node); - hole_end = drm_mm_hole_node_end(prev_node); - - adj_start = hole_start; - adj_end = hole_end; - - if (mm->color_adjust) - mm->color_adjust(prev_node, mm->scan_color, &adj_start, &adj_end); + adj_start = hole_start = drm_mm_hole_node_start(prev_node); + adj_end = hole_end = drm_mm_hole_node_end(prev_node); if (mm->scan_check_range) { if (adj_start < mm->scan_start) @@ -586,11 +581,14 @@ int drm_mm_scan_add_block(struct drm_mm_node *node) adj_end = mm->scan_end; } + if (mm->color_adjust) + mm->color_adjust(prev_node, mm->scan_color, + &adj_start, &adj_end); + if (check_free_hole(adj_start, adj_end, mm->scan_size, mm->scan_alignment)) { mm->scan_hit_start = hole_start; - mm->scan_hit_size = hole_end; - + mm->scan_hit_end = hole_end; return 1; } @@ -626,19 +624,10 @@ int drm_mm_scan_remove_block(struct drm_mm_node *node) node_list); prev_node->hole_follows = node->scanned_preceeds_hole; - INIT_LIST_HEAD(&node->node_list); list_add(&node->node_list, &prev_node->node_list); - /* Only need to check for containement because start&size for the - * complete resulting free block (not just the desired part) is - * stored. */ - if (node->start >= mm->scan_hit_start && - node->start + node->size - <= mm->scan_hit_start + mm->scan_hit_size) { - return 1; - } - - return 0; + return (drm_mm_hole_node_end(node) > mm->scan_hit_start && + node->start < mm->scan_hit_end); } EXPORT_SYMBOL(drm_mm_scan_remove_block); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 0f4a366f6fa6..3527fb3f75bb 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -70,7 +70,7 @@ struct drm_mm { unsigned long scan_color; unsigned long scan_size; unsigned long scan_hit_start; - unsigned scan_hit_size; + unsigned long scan_hit_end; unsigned scanned_blocks; unsigned long scan_start; unsigned long scan_end; -- cgit v1.2.3 From 54b956b903607f8f8878754dd4352da6a54a1da2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Jan 2013 10:57:01 -0800 Subject: Remove __dev* markings from init.h Now that all in-kernel users of __dev* are gone, let's remove them from init.h to keep them from popping up again and again. Thanks to Bill Pemberton for doing all of the hard work to make removal of this possible. Cc: Bill Pemberton Cc: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- include/linux/init.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index a799273714ac..10ed4f436458 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -93,14 +93,6 @@ #define __exit __section(.exit.text) __exitused __cold notrace -/* Used for HOTPLUG, but that is always enabled now, so just make them noops */ -#define __devinit -#define __devinitdata -#define __devinitconst -#define __devexit -#define __devexitdata -#define __devexitconst - /* Used for HOTPLUG_CPU */ #define __cpuinit __section(.cpuinit.text) __cold notrace #define __cpuinitdata __section(.cpuinit.data) @@ -337,18 +329,6 @@ void __init parse_early_options(char *cmdline); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ -/* Functions marked as __devexit may be discarded at kernel link time, depending - on config options. Newer versions of binutils detect references from - retained sections to discarded sections and flag an error. Pointers to - __devexit functions must use __devexit_p(function_name), the wrapper will - insert either the function_name or NULL, depending on the config options. - */ -#if defined(MODULE) || defined(CONFIG_HOTPLUG) -#define __devexit_p(x) x -#else -#define __devexit_p(x) NULL -#endif - #ifdef MODULE #define __exit_p(x) x #else -- cgit v1.2.3 From ba829137bfd167623363548aa385be769c6b2664 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 17 Dec 2012 09:53:33 +0100 Subject: target: Introduce TCM_NO_SENSE Introduce TCM_NO_SENSE, mapping to sense code 'Not ready, no additional sense information'. Signed-off-by: Hannes Reinecke Cc: Nicholas Bellinger Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 10 ++++++++++ include/target/target_core_base.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 978762dc81bf..104bf3b45a01 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2597,6 +2597,16 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, * SENSE KEY values from include/scsi/scsi.h */ switch (reason) { + case TCM_NO_SENSE: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + /* Not Ready */ + buffer[SPC_SENSE_KEY_OFFSET] = NOT_READY; + /* NO ADDITIONAL SENSE INFORMATION */ + buffer[SPC_ASC_KEY_OFFSET] = 0; + buffer[SPC_ASCQ_KEY_OFFSET] = 0; + break; case TCM_NON_EXISTENT_LUN: /* CURRENT ERROR */ buffer[0] = 0x70; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 7cae2360221e..663e34a5383f 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -174,6 +174,7 @@ typedef unsigned __bitwise__ sense_reason_t; enum tcm_sense_reason_table { #define R(x) (__force sense_reason_t )(x) + TCM_NO_SENSE = R(0x00), TCM_NON_EXISTENT_LUN = R(0x01), TCM_UNSUPPORTED_SCSI_OPCODE = R(0x02), TCM_INCORRECT_AMOUNT_OF_DATA = R(0x03), -- cgit v1.2.3 From 47ecfcb7d01418fcbfbc75183ba5e28e98b667b2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 11 Jan 2013 09:27:01 +0000 Subject: mm: compaction: Partially revert capture of suitable high-order page Eric Wong reported on 3.7 and 3.8-rc2 that ppoll() got stuck when waiting for POLLIN on a local TCP socket. It was easier to trigger if there was disk IO and dirty pages at the same time and he bisected it to commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available"). The intention of that patch was to improve high-order allocations under memory pressure after changes made to reclaim in 3.6 drastically hurt THP allocations but the approach was flawed. For Eric, the problem was that page->pfmemalloc was not being cleared for captured pages leading to a poor interaction with swap-over-NFS support causing the packets to be dropped. However, I identified a few more problems with the patch including the fact that it can increase contention on zone->lock in some cases which could result in async direct compaction being aborted early. In retrospect the capture patch took the wrong approach. What it should have done is mark the pageblock being migrated as MIGRATE_ISOLATE if it was allocating for THP and avoided races that way. While the patch was showing to improve allocation success rates at the time, the benefit is marginal given the relative complexity and it should be revisited from scratch in the context of the other reclaim-related changes that have taken place since the patch was first written and tested. This patch partially reverts commit 1fb3f8ca "mm: compaction: capture a suitable high-order page immediately when it is made available". Reported-and-tested-by: Eric Wong Tested-by: Eric Dumazet Cc: stable@vger.kernel.org Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 4 +- include/linux/mm.h | 1 - mm/compaction.c | 92 +++++++--------------------------------------- mm/internal.h | 1 - mm/page_alloc.c | 35 ++++-------------- 5 files changed, 23 insertions(+), 110 deletions(-) (limited to 'include') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 6ecb6dc2f303..cc7bddeaf553 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, - bool sync, bool *contended, struct page **page); + bool sync, bool *contended); extern int compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order); @@ -75,7 +75,7 @@ static inline bool compaction_restarting(struct zone *zone, int order) #else static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended, struct page **page) + bool sync, bool *contended) { return COMPACT_CONTINUE; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 63204078f72b..66e2f7c61e5c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -455,7 +455,6 @@ void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); int split_free_page(struct page *page); -int capture_free_page(struct page *page, int alloc_order, int migratetype); /* * Compound pages have a destructor function. Provide a diff --git a/mm/compaction.c b/mm/compaction.c index 6b807e466497..2c570432aa56 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -816,6 +816,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, static int compact_finished(struct zone *zone, struct compact_control *cc) { + unsigned int order; unsigned long watermark; if (fatal_signal_pending(current)) @@ -850,22 +851,16 @@ static int compact_finished(struct zone *zone, return COMPACT_CONTINUE; /* Direct compactor: Is a suitable page free? */ - if (cc->page) { - /* Was a suitable page captured? */ - if (*cc->page) + for (order = cc->order; order < MAX_ORDER; order++) { + struct free_area *area = &zone->free_area[order]; + + /* Job done if page is free of the right migratetype */ + if (!list_empty(&area->free_list[cc->migratetype])) + return COMPACT_PARTIAL; + + /* Job done if allocation would set block type */ + if (cc->order >= pageblock_order && area->nr_free) return COMPACT_PARTIAL; - } else { - unsigned int order; - for (order = cc->order; order < MAX_ORDER; order++) { - struct free_area *area = &zone->free_area[cc->order]; - /* Job done if page is free of the right migratetype */ - if (!list_empty(&area->free_list[cc->migratetype])) - return COMPACT_PARTIAL; - - /* Job done if allocation would set block type */ - if (cc->order >= pageblock_order && area->nr_free) - return COMPACT_PARTIAL; - } } return COMPACT_CONTINUE; @@ -921,60 +916,6 @@ unsigned long compaction_suitable(struct zone *zone, int order) return COMPACT_CONTINUE; } -static void compact_capture_page(struct compact_control *cc) -{ - unsigned long flags; - int mtype, mtype_low, mtype_high; - - if (!cc->page || *cc->page) - return; - - /* - * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP - * regardless of the migratetype of the freelist is is captured from. - * This is fine because the order for a high-order MIGRATE_MOVABLE - * allocation is typically at least a pageblock size and overall - * fragmentation is not impaired. Other allocation types must - * capture pages from their own migratelist because otherwise they - * could pollute other pageblocks like MIGRATE_MOVABLE with - * difficult to move pages and making fragmentation worse overall. - */ - if (cc->migratetype == MIGRATE_MOVABLE) { - mtype_low = 0; - mtype_high = MIGRATE_PCPTYPES; - } else { - mtype_low = cc->migratetype; - mtype_high = cc->migratetype + 1; - } - - /* Speculatively examine the free lists without zone lock */ - for (mtype = mtype_low; mtype < mtype_high; mtype++) { - int order; - for (order = cc->order; order < MAX_ORDER; order++) { - struct page *page; - struct free_area *area; - area = &(cc->zone->free_area[order]); - if (list_empty(&area->free_list[mtype])) - continue; - - /* Take the lock and attempt capture of the page */ - if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc)) - return; - if (!list_empty(&area->free_list[mtype])) { - page = list_entry(area->free_list[mtype].next, - struct page, lru); - if (capture_free_page(page, cc->order, mtype)) { - spin_unlock_irqrestore(&cc->zone->lock, - flags); - *cc->page = page; - return; - } - } - spin_unlock_irqrestore(&cc->zone->lock, flags); - } - } -} - static int compact_zone(struct zone *zone, struct compact_control *cc) { int ret; @@ -1054,9 +995,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) goto out; } } - - /* Capture a page now if it is a suitable size */ - compact_capture_page(cc); } out: @@ -1069,8 +1007,7 @@ out: static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - bool sync, bool *contended, - struct page **page) + bool sync, bool *contended) { unsigned long ret; struct compact_control cc = { @@ -1080,7 +1017,6 @@ static unsigned long compact_zone_order(struct zone *zone, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, .sync = sync, - .page = page, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -1110,7 +1046,7 @@ int sysctl_extfrag_threshold = 500; */ unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended, struct page **page) + bool sync, bool *contended) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); int may_enter_fs = gfp_mask & __GFP_FS; @@ -1136,7 +1072,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, int status; status = compact_zone_order(zone, order, gfp_mask, sync, - contended, page); + contended); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ @@ -1192,7 +1128,6 @@ int compact_pgdat(pg_data_t *pgdat, int order) struct compact_control cc = { .order = order, .sync = false, - .page = NULL, }; return __compact_pgdat(pgdat, &cc); @@ -1203,7 +1138,6 @@ static int compact_node(int nid) struct compact_control cc = { .order = -1, .sync = true, - .page = NULL, }; return __compact_pgdat(NODE_DATA(nid), &cc); diff --git a/mm/internal.h b/mm/internal.h index d597f94cc205..9ba21100ebf3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -135,7 +135,6 @@ struct compact_control { int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; bool contended; /* True if a lock was contended */ - struct page **page; /* Page captured of requested size */ }; unsigned long diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bc6cc0e913bd..ece7b8e8a5b6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1384,14 +1384,8 @@ void split_page(struct page *page, unsigned int order) set_page_refcounted(page + i); } -/* - * Similar to the split_page family of functions except that the page - * required at the given order and being isolated now to prevent races - * with parallel allocators - */ -int capture_free_page(struct page *page, int alloc_order, int migratetype) +static int __isolate_free_page(struct page *page, unsigned int order) { - unsigned int order; unsigned long watermark; struct zone *zone; int mt; @@ -1399,7 +1393,6 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) BUG_ON(!PageBuddy(page)); zone = page_zone(page); - order = page_order(page); mt = get_pageblock_migratetype(page); if (mt != MIGRATE_ISOLATE) { @@ -1408,7 +1401,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) return 0; - __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); + __mod_zone_freepage_state(zone, -(1UL << order), mt); } /* Remove page from free list */ @@ -1416,11 +1409,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) zone->free_area[order].nr_free--; rmv_page_order(page); - if (alloc_order != order) - expand(zone, page, alloc_order, order, - &zone->free_area[order], migratetype); - - /* Set the pageblock if the captured page is at least a pageblock */ + /* Set the pageblock if the isolated page is at least a pageblock */ if (order >= pageblock_order - 1) { struct page *endpage = page + (1 << order) - 1; for (; page < endpage; page += pageblock_nr_pages) { @@ -1431,7 +1420,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) } } - return 1UL << alloc_order; + return 1UL << order; } /* @@ -1449,10 +1438,9 @@ int split_free_page(struct page *page) unsigned int order; int nr_pages; - BUG_ON(!PageBuddy(page)); order = page_order(page); - nr_pages = capture_free_page(page, order, 0); + nr_pages = __isolate_free_page(page, order); if (!nr_pages) return 0; @@ -2136,8 +2124,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, bool *contended_compaction, bool *deferred_compaction, unsigned long *did_some_progress) { - struct page *page = NULL; - if (!order) return NULL; @@ -2149,16 +2135,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, current->flags |= PF_MEMALLOC; *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, nodemask, sync_migration, - contended_compaction, &page); + contended_compaction); current->flags &= ~PF_MEMALLOC; - /* If compaction captured a page, prep and use it */ - if (page) { - prep_new_page(page, order, gfp_mask); - goto got_page; - } - if (*did_some_progress != COMPACT_SKIPPED) { + struct page *page; + /* Page migration frees to the PCP lists but we want merging */ drain_pages(get_cpu()); put_cpu(); @@ -2168,7 +2150,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, alloc_flags & ~ALLOC_NO_WATERMARKS, preferred_zone, migratetype); if (page) { -got_page: preferred_zone->compact_blockskip_flush = false; preferred_zone->compact_considered = 0; preferred_zone->compact_defer_shift = 0; -- cgit v1.2.3 From 896f97ea95c1d29c0520ee0766b66b7f64cb967c Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Fri, 11 Jan 2013 14:31:36 -0800 Subject: lib: cpu_rmap: avoid flushing all workqueues In some cases, free_irq_cpu_rmap() is called while holding a lock (eg rtnl). This can lead to deadlocks, because it invokes flush_scheduled_work() which ends up waiting for whole system workqueue to flush, but some pending works might try to acquire the lock we are already holding. This commit uses reference-counting to replace irq_run_affinity_notifiers(). It also removes irq_run_affinity_notifiers() altogether. [akpm@linux-foundation.org: eliminate free_cpu_rmap, rename cpu_rmap_reclaim() to cpu_rmap_release(), propagate kref_put() retval from cpu_rmap_put()] Signed-off-by: David Decotigny Reviewed-by: Ben Hutchings Acked-by: Eric Dumazet Reviewed-by: Josh Triplett Cc: "David S. Miller" Cc: Or Gerlitz Acked-by: Amir Vadai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu_rmap.h | 13 ++++-------- include/linux/interrupt.h | 5 ----- lib/cpu_rmap.c | 54 ++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 53 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index ac3bbb5b9502..1739510d8994 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -13,9 +13,11 @@ #include #include #include +#include /** * struct cpu_rmap - CPU affinity reverse-map + * @refcount: kref for object * @size: Number of objects to be reverse-mapped * @used: Number of objects added * @obj: Pointer to array of object pointers @@ -23,6 +25,7 @@ * based on affinity masks */ struct cpu_rmap { + struct kref refcount; u16 size, used; void **obj; struct { @@ -33,15 +36,7 @@ struct cpu_rmap { #define CPU_RMAP_DIST_INF 0xffff extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags); - -/** - * free_cpu_rmap - free CPU affinity reverse-map - * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL - */ -static inline void free_cpu_rmap(struct cpu_rmap *rmap) -{ - kfree(rmap); -} +extern int cpu_rmap_put(struct cpu_rmap *rmap); extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj); extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5e4e6170f43a..5fa5afeeb759 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -268,11 +268,6 @@ struct irq_affinity_notify { extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); -static inline void irq_run_affinity_notifiers(void) -{ - flush_scheduled_work(); -} - #else /* CONFIG_SMP */ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index 145dec5267c9..5fbed5caba6e 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c @@ -45,6 +45,7 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) if (!rmap) return NULL; + kref_init(&rmap->refcount); rmap->obj = (void **)((char *)rmap + obj_offset); /* Initially assign CPUs to objects on a rota, since we have @@ -63,6 +64,35 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) } EXPORT_SYMBOL(alloc_cpu_rmap); +/** + * cpu_rmap_release - internal reclaiming helper called from kref_put + * @ref: kref to struct cpu_rmap + */ +static void cpu_rmap_release(struct kref *ref) +{ + struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount); + kfree(rmap); +} + +/** + * cpu_rmap_get - internal helper to get new ref on a cpu_rmap + * @rmap: reverse-map allocated with alloc_cpu_rmap() + */ +static inline void cpu_rmap_get(struct cpu_rmap *rmap) +{ + kref_get(&rmap->refcount); +} + +/** + * cpu_rmap_put - release ref on a cpu_rmap + * @rmap: reverse-map allocated with alloc_cpu_rmap() + */ +int cpu_rmap_put(struct cpu_rmap *rmap) +{ + return kref_put(&rmap->refcount, cpu_rmap_release); +} +EXPORT_SYMBOL(cpu_rmap_put); + /* Reevaluate nearest object for given CPU, comparing with the given * neighbours at the given distance. */ @@ -197,8 +227,7 @@ struct irq_glue { * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL * - * Must be called in process context, before freeing the IRQs, and - * without holding any locks required by global workqueue items. + * Must be called in process context, before freeing the IRQs. */ void free_irq_cpu_rmap(struct cpu_rmap *rmap) { @@ -212,12 +241,18 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap) glue = rmap->obj[index]; irq_set_affinity_notifier(glue->notify.irq, NULL); } - irq_run_affinity_notifiers(); - kfree(rmap); + cpu_rmap_put(rmap); } EXPORT_SYMBOL(free_irq_cpu_rmap); +/** + * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated + * @notify: struct irq_affinity_notify passed by irq/manage.c + * @mask: cpu mask for new SMP affinity + * + * This is executed in workqueue context. + */ static void irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) { @@ -230,10 +265,16 @@ irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc); } +/** + * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem + * @ref: kref to struct irq_affinity_notify passed by irq/manage.c + */ static void irq_cpu_rmap_release(struct kref *ref) { struct irq_glue *glue = container_of(ref, struct irq_glue, notify.kref); + + cpu_rmap_put(glue->rmap); kfree(glue); } @@ -258,10 +299,13 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) glue->notify.notify = irq_cpu_rmap_notify; glue->notify.release = irq_cpu_rmap_release; glue->rmap = rmap; + cpu_rmap_get(rmap); glue->index = cpu_rmap_add(rmap, glue); rc = irq_set_affinity_notifier(irq, &glue->notify); - if (rc) + if (rc) { + cpu_rmap_put(glue->rmap); kfree(glue); + } return rc; } EXPORT_SYMBOL(irq_cpu_rmap_add); -- cgit v1.2.3 From 1b963c81b14509e330e0fe3218b645ece2738dc5 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 11 Jan 2013 14:31:56 -0800 Subject: lockdep, rwsem: provide down_write_nest_lock() down_write_nest_lock() provides a means to annotate locking scenario where an outer lock is guaranteed to serialize the order nested locks are being acquired. This is analogoue to already existing mutex_lock_nest_lock() and spin_lock_nest_lock(). Signed-off-by: Jiri Kosina Cc: Rik van Riel Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Mel Gorman Tested-by: Sedat Dilek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 3 +++ include/linux/rwsem.h | 9 +++++++++ kernel/rwsem.c | 10 ++++++++++ 3 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 00e46376e28f..2bca44b0893c 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -524,14 +524,17 @@ static inline void print_irqtrace_events(struct task_struct *curr) #ifdef CONFIG_DEBUG_LOCK_ALLOC # ifdef CONFIG_PROVE_LOCKING # define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i) +# define rwsem_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) # define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, NULL, i) # else # define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i) +# define rwsem_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) # define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, NULL, i) # endif # define rwsem_release(l, n, i) lock_release(l, n, i) #else # define rwsem_acquire(l, s, t, i) do { } while (0) +# define rwsem_acquire_nest(l, s, t, n, i) do { } while (0) # define rwsem_acquire_read(l, s, t, i) do { } while (0) # define rwsem_release(l, n, i) do { } while (0) #endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 54bd7cd7ecbd..413cc11e414a 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -125,8 +125,17 @@ extern void downgrade_write(struct rw_semaphore *sem); */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); +extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); + +# define down_write_nest_lock(sem, nest_lock) \ +do { \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ + _down_write_nest_lock(sem, &(nest_lock)->dep_map); \ +} while (0); + #else # define down_read_nested(sem, subclass) down_read(sem) +# define down_write_nest_lock(sem, nest_lock) down_read(sem) # define down_write_nested(sem, subclass) down_write(sem) #endif diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 6850f53e02d8..b3c6c3fcd847 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -116,6 +116,16 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) EXPORT_SYMBOL(down_read_nested); +void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) +{ + might_sleep(); + rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); + + LOCK_CONTENDED(sem, __down_write_trylock, __down_write); +} + +EXPORT_SYMBOL(_down_write_nest_lock); + void down_write_nested(struct rw_semaphore *sem, int subclass) { might_sleep(); -- cgit v1.2.3 From 7b9205bd775afc4439ed86d617f9042ee9e76a71 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Jan 2013 14:32:05 -0800 Subject: audit: create explicit AUDIT_SECCOMP event type The seccomp path was using AUDIT_ANOM_ABEND from when seccomp mode 1 could only kill a process. While we still want to make sure an audit record is forced on a kill, this should use a separate record type since seccomp mode 2 introduces other behaviors. In the case of "handled" behaviors (process wasn't killed), only emit a record if the process is under inspection. This change also fixes userspace examination of seccomp audit events, since it was considered malformed due to missing fields of the AUDIT_ANOM_ABEND event type. Signed-off-by: Kees Cook Cc: Al Viro Cc: Eric Paris Cc: Jeff Layton Cc: "Eric W. Biederman" Cc: Julien Tinnes Acked-by: Will Drewry Acked-by: Steve Grubb Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 3 ++- include/uapi/linux/audit.h | 1 + kernel/auditsc.c | 14 +++++++++++--- 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index bce729afbcf9..9d5104d7aba9 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -157,7 +157,8 @@ void audit_core_dumps(long signr); static inline void audit_seccomp(unsigned long syscall, long signr, int code) { - if (unlikely(!audit_dummy_context())) + /* Force a record to be reported if a signal was delivered. */ + if (signr || unlikely(!audit_dummy_context())) __audit_seccomp(syscall, signr, code); } diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 76352ac45f24..09a2d94ab113 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -106,6 +106,7 @@ #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ #define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */ #define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */ +#define AUDIT_SECCOMP 1326 /* Secure Computing event */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e37e6a12c5e3..3e46d1dec613 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags) context->type = AUDIT_MMAP; } -static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) +static void audit_log_task(struct audit_buffer *ab) { kuid_t auid, uid; kgid_t gid; @@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) audit_log_task_context(ab); audit_log_format(ab, " pid=%d comm=", current->pid); audit_log_untrustedstring(ab, current->comm); +} + +static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) +{ + audit_log_task(ab); audit_log_format(ab, " reason="); audit_log_string(ab, reason); audit_log_format(ab, " sig=%ld", signr); @@ -2723,8 +2728,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code) { struct audit_buffer *ab; - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); - audit_log_abend(ab, "seccomp", signr); + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP); + if (unlikely(!ab)) + return; + audit_log_task(ab); + audit_log_format(ab, " sig=%ld", signr); audit_log_format(ab, " syscall=%ld", syscall); audit_log_format(ab, " compat=%d", is_compat_task()); audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current)); -- cgit v1.2.3 From c0a3a20b6c4b5229ef5d26fd9b1c4b1957632aa7 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 11 Jan 2013 14:32:13 -0800 Subject: linux/audit.h: move ptrace.h include to kernel header While the kernel internals want pt_regs (and so it includes linux/ptrace.h), the user version of audit.h does not need it. So move the include out of the uapi version. This avoids issues where people want the audit defines and userland ptrace api. Including both the kernel ptrace and the userland ptrace headers can easily lead to failure. Signed-off-by: Mike Frysinger Cc: Eric Paris Cc: Al Viro Reviewed-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 1 + include/uapi/linux/audit.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 9d5104d7aba9..5a6d718adf34 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -24,6 +24,7 @@ #define _LINUX_AUDIT_H_ #include +#include #include struct audit_sig_info { diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 09a2d94ab113..9f096f1c0907 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -26,7 +26,6 @@ #include #include -#include /* The netlink messages for the audit system is divided into blocks: * 1000 - 1099 are for commanding the audit system -- cgit v1.2.3 From 8fb74b9fb2b182d54beee592350d9ea1f325917a Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 11 Jan 2013 14:32:16 -0800 Subject: mm: compaction: partially revert capture of suitable high-order page Eric Wong reported on 3.7 and 3.8-rc2 that ppoll() got stuck when waiting for POLLIN on a local TCP socket. It was easier to trigger if there was disk IO and dirty pages at the same time and he bisected it to commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available"). The intention of that patch was to improve high-order allocations under memory pressure after changes made to reclaim in 3.6 drastically hurt THP allocations but the approach was flawed. For Eric, the problem was that page->pfmemalloc was not being cleared for captured pages leading to a poor interaction with swap-over-NFS support causing the packets to be dropped. However, I identified a few more problems with the patch including the fact that it can increase contention on zone->lock in some cases which could result in async direct compaction being aborted early. In retrospect the capture patch took the wrong approach. What it should have done is mark the pageblock being migrated as MIGRATE_ISOLATE if it was allocating for THP and avoided races that way. While the patch was showing to improve allocation success rates at the time, the benefit is marginal given the relative complexity and it should be revisited from scratch in the context of the other reclaim-related changes that have taken place since the patch was first written and tested. This patch partially reverts commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available"). Reported-and-tested-by: Eric Wong Tested-by: Eric Dumazet Cc: Signed-off-by: Mel Gorman Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 4 +- include/linux/mm.h | 1 - mm/compaction.c | 92 +++++++--------------------------------------- mm/internal.h | 1 - mm/page_alloc.c | 35 ++++-------------- 5 files changed, 23 insertions(+), 110 deletions(-) (limited to 'include') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 6ecb6dc2f303..cc7bddeaf553 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, - bool sync, bool *contended, struct page **page); + bool sync, bool *contended); extern int compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order); @@ -75,7 +75,7 @@ static inline bool compaction_restarting(struct zone *zone, int order) #else static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended, struct page **page) + bool sync, bool *contended) { return COMPACT_CONTINUE; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 63204078f72b..66e2f7c61e5c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -455,7 +455,6 @@ void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); int split_free_page(struct page *page); -int capture_free_page(struct page *page, int alloc_order, int migratetype); /* * Compound pages have a destructor function. Provide a diff --git a/mm/compaction.c b/mm/compaction.c index f8f5c111b7d7..c62bd063d766 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -816,6 +816,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, static int compact_finished(struct zone *zone, struct compact_control *cc) { + unsigned int order; unsigned long watermark; if (fatal_signal_pending(current)) @@ -850,22 +851,16 @@ static int compact_finished(struct zone *zone, return COMPACT_CONTINUE; /* Direct compactor: Is a suitable page free? */ - if (cc->page) { - /* Was a suitable page captured? */ - if (*cc->page) + for (order = cc->order; order < MAX_ORDER; order++) { + struct free_area *area = &zone->free_area[order]; + + /* Job done if page is free of the right migratetype */ + if (!list_empty(&area->free_list[cc->migratetype])) + return COMPACT_PARTIAL; + + /* Job done if allocation would set block type */ + if (cc->order >= pageblock_order && area->nr_free) return COMPACT_PARTIAL; - } else { - unsigned int order; - for (order = cc->order; order < MAX_ORDER; order++) { - struct free_area *area = &zone->free_area[cc->order]; - /* Job done if page is free of the right migratetype */ - if (!list_empty(&area->free_list[cc->migratetype])) - return COMPACT_PARTIAL; - - /* Job done if allocation would set block type */ - if (cc->order >= pageblock_order && area->nr_free) - return COMPACT_PARTIAL; - } } return COMPACT_CONTINUE; @@ -921,60 +916,6 @@ unsigned long compaction_suitable(struct zone *zone, int order) return COMPACT_CONTINUE; } -static void compact_capture_page(struct compact_control *cc) -{ - unsigned long flags; - int mtype, mtype_low, mtype_high; - - if (!cc->page || *cc->page) - return; - - /* - * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP - * regardless of the migratetype of the freelist is is captured from. - * This is fine because the order for a high-order MIGRATE_MOVABLE - * allocation is typically at least a pageblock size and overall - * fragmentation is not impaired. Other allocation types must - * capture pages from their own migratelist because otherwise they - * could pollute other pageblocks like MIGRATE_MOVABLE with - * difficult to move pages and making fragmentation worse overall. - */ - if (cc->migratetype == MIGRATE_MOVABLE) { - mtype_low = 0; - mtype_high = MIGRATE_PCPTYPES; - } else { - mtype_low = cc->migratetype; - mtype_high = cc->migratetype + 1; - } - - /* Speculatively examine the free lists without zone lock */ - for (mtype = mtype_low; mtype < mtype_high; mtype++) { - int order; - for (order = cc->order; order < MAX_ORDER; order++) { - struct page *page; - struct free_area *area; - area = &(cc->zone->free_area[order]); - if (list_empty(&area->free_list[mtype])) - continue; - - /* Take the lock and attempt capture of the page */ - if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc)) - return; - if (!list_empty(&area->free_list[mtype])) { - page = list_entry(area->free_list[mtype].next, - struct page, lru); - if (capture_free_page(page, cc->order, mtype)) { - spin_unlock_irqrestore(&cc->zone->lock, - flags); - *cc->page = page; - return; - } - } - spin_unlock_irqrestore(&cc->zone->lock, flags); - } - } -} - static int compact_zone(struct zone *zone, struct compact_control *cc) { int ret; @@ -1054,9 +995,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) goto out; } } - - /* Capture a page now if it is a suitable size */ - compact_capture_page(cc); } out: @@ -1069,8 +1007,7 @@ out: static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - bool sync, bool *contended, - struct page **page) + bool sync, bool *contended) { unsigned long ret; struct compact_control cc = { @@ -1080,7 +1017,6 @@ static unsigned long compact_zone_order(struct zone *zone, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, .sync = sync, - .page = page, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -1110,7 +1046,7 @@ int sysctl_extfrag_threshold = 500; */ unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended, struct page **page) + bool sync, bool *contended) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); int may_enter_fs = gfp_mask & __GFP_FS; @@ -1136,7 +1072,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, int status; status = compact_zone_order(zone, order, gfp_mask, sync, - contended, page); + contended); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ @@ -1192,7 +1128,6 @@ int compact_pgdat(pg_data_t *pgdat, int order) struct compact_control cc = { .order = order, .sync = false, - .page = NULL, }; return __compact_pgdat(pgdat, &cc); @@ -1203,7 +1138,6 @@ static int compact_node(int nid) struct compact_control cc = { .order = -1, .sync = true, - .page = NULL, }; return __compact_pgdat(NODE_DATA(nid), &cc); diff --git a/mm/internal.h b/mm/internal.h index d597f94cc205..9ba21100ebf3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -135,7 +135,6 @@ struct compact_control { int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; bool contended; /* True if a lock was contended */ - struct page **page; /* Page captured of requested size */ }; unsigned long diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c957805a7f0e..df2022ff0c8a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1384,14 +1384,8 @@ void split_page(struct page *page, unsigned int order) set_page_refcounted(page + i); } -/* - * Similar to the split_page family of functions except that the page - * required at the given order and being isolated now to prevent races - * with parallel allocators - */ -int capture_free_page(struct page *page, int alloc_order, int migratetype) +static int __isolate_free_page(struct page *page, unsigned int order) { - unsigned int order; unsigned long watermark; struct zone *zone; int mt; @@ -1399,7 +1393,6 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) BUG_ON(!PageBuddy(page)); zone = page_zone(page); - order = page_order(page); mt = get_pageblock_migratetype(page); if (mt != MIGRATE_ISOLATE) { @@ -1408,7 +1401,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) return 0; - __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); + __mod_zone_freepage_state(zone, -(1UL << order), mt); } /* Remove page from free list */ @@ -1416,11 +1409,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) zone->free_area[order].nr_free--; rmv_page_order(page); - if (alloc_order != order) - expand(zone, page, alloc_order, order, - &zone->free_area[order], migratetype); - - /* Set the pageblock if the captured page is at least a pageblock */ + /* Set the pageblock if the isolated page is at least a pageblock */ if (order >= pageblock_order - 1) { struct page *endpage = page + (1 << order) - 1; for (; page < endpage; page += pageblock_nr_pages) { @@ -1431,7 +1420,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) } } - return 1UL << alloc_order; + return 1UL << order; } /* @@ -1449,10 +1438,9 @@ int split_free_page(struct page *page) unsigned int order; int nr_pages; - BUG_ON(!PageBuddy(page)); order = page_order(page); - nr_pages = capture_free_page(page, order, 0); + nr_pages = __isolate_free_page(page, order); if (!nr_pages) return 0; @@ -2136,8 +2124,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, bool *contended_compaction, bool *deferred_compaction, unsigned long *did_some_progress) { - struct page *page = NULL; - if (!order) return NULL; @@ -2149,16 +2135,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, current->flags |= PF_MEMALLOC; *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, nodemask, sync_migration, - contended_compaction, &page); + contended_compaction); current->flags &= ~PF_MEMALLOC; - /* If compaction captured a page, prep and use it */ - if (page) { - prep_new_page(page, order, gfp_mask); - goto got_page; - } - if (*did_some_progress != COMPACT_SKIPPED) { + struct page *page; + /* Page migration frees to the PCP lists but we want merging */ drain_pages(get_cpu()); put_cpu(); @@ -2168,7 +2150,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, alloc_flags & ~ALLOC_NO_WATERMARKS, preferred_zone, migratetype); if (page) { -got_page: preferred_zone->compact_blockskip_flush = false; preferred_zone->compact_considered = 0; preferred_zone->compact_defer_shift = 0; -- cgit v1.2.3 From 3cb7a56344ca45ee56d71c5f8fe9f922306bff1f Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 11 Jan 2013 14:32:20 -0800 Subject: lib/rbtree.c: avoid the use of non-static __always_inline lib/rbtree.c declared __rb_erase_color() as __always_inline void, and then exported it with EXPORT_SYMBOL. This was because __rb_erase_color() must be exported for augmented rbtree users, but it must also be inlined into rb_erase() so that the dummy callback can get optimized out of that call site. (Actually with a modern compiler, none of the dummy callback functions should even be generated as separate text functions). The above usage is legal C, but it was unusual enough for some compilers to warn about it. This change makes things more explicit, with a static __always_inline ____rb_erase_color function for use in rb_erase(), and a separate non-inline __rb_erase_color function for use in rb_erase_augmented call sites. Signed-off-by: Michel Lespinasse Reported-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree_augmented.h | 14 +++++++++++--- lib/rbtree.c | 20 +++++++++++++++++--- 2 files changed, 28 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 2ac60c9cf644..fea49b5da12a 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -123,9 +123,9 @@ __rb_change_child(struct rb_node *old, struct rb_node *new, extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); -static __always_inline void -rb_erase_augmented(struct rb_node *node, struct rb_root *root, - const struct rb_augment_callbacks *augment) +static __always_inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) { struct rb_node *child = node->rb_right, *tmp = node->rb_left; struct rb_node *parent, *rebalance; @@ -217,6 +217,14 @@ rb_erase_augmented(struct rb_node *node, struct rb_root *root, } augment->propagate(tmp, NULL); + return rebalance; +} + +static __always_inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); if (rebalance) __rb_erase_color(rebalance, root, augment->rotate); } diff --git a/lib/rbtree.c b/lib/rbtree.c index 4f56a11d67fa..c0e31fe2fabf 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -194,8 +194,12 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } } -__always_inline void -__rb_erase_color(struct rb_node *parent, struct rb_root *root, +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static __always_inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; @@ -355,6 +359,13 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root, } } } + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} EXPORT_SYMBOL(__rb_erase_color); /* @@ -380,7 +391,10 @@ EXPORT_SYMBOL(rb_insert_color); void rb_erase(struct rb_node *node, struct rb_root *root) { - rb_erase_augmented(node, root, &dummy_callbacks); + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); } EXPORT_SYMBOL(rb_erase); -- cgit v1.2.3 From d07d7507bfb4e23735c9b83e397c43e1e8a173e8 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 10 Jan 2013 23:19:10 +0000 Subject: net, wireless: overwrite default_ethtool_ops Since: commit 2c60db037034d27f8c636403355d52872da92f81 Author: Eric Dumazet Date: Sun Sep 16 09:17:26 2012 +0000 net: provide a default dev->ethtool_ops wireless core does not correctly assign ethtool_ops. After alloc_netdev*() call, some cfg80211 drivers provide they own ethtool_ops, but some do not. For them, wireless core provide generic cfg80211_ethtool_ops, which is assigned in NETDEV_REGISTER notify call: if (!dev->ethtool_ops) dev->ethtool_ops = &cfg80211_ethtool_ops; But after Eric's commit, dev->ethtool_ops is no longer NULL (on cfg80211 drivers without custom ethtool_ops), but points to &default_ethtool_ops. In order to fix the problem, provide function which will overwrite default_ethtool_ops and use it by wireless core. Signed-off-by: Stanislaw Gruszka Acked-by: Johannes Berg Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 8 ++++++++ net/wireless/core.c | 3 +-- 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c599e4782d45..9ef07d0868b6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -60,6 +60,9 @@ struct wireless_dev; #define SET_ETHTOOL_OPS(netdev,ops) \ ( (netdev)->ethtool_ops = (ops) ) +extern void netdev_set_default_ethtool_ops(struct net_device *dev, + const struct ethtool_ops *ops); + /* hardware address assignment types */ #define NET_ADDR_PERM 0 /* address is permanent (default) */ #define NET_ADDR_RANDOM 1 /* address is generated randomly */ diff --git a/net/core/dev.c b/net/core/dev.c index 515473ee52cb..f64e439b4a00 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6121,6 +6121,14 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) static const struct ethtool_ops default_ethtool_ops; +void netdev_set_default_ethtool_ops(struct net_device *dev, + const struct ethtool_ops *ops) +{ + if (dev->ethtool_ops == &default_ethtool_ops) + dev->ethtool_ops = ops; +} +EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); + /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for diff --git a/net/wireless/core.c b/net/wireless/core.c index 14d990400354..b677eab55b68 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -866,8 +866,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, /* allow mac80211 to determine the timeout */ wdev->ps_timeout = -1; - if (!dev->ethtool_ops) - dev->ethtool_ops = &cfg80211_ethtool_ops; + netdev_set_default_ethtool_ops(dev, &cfg80211_ethtool_ops); if ((wdev->iftype == NL80211_IFTYPE_STATION || wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || -- cgit v1.2.3 From 0d21b0e3477395e7ff2acc269f15df6e6a8d356d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 12 Jan 2013 11:38:44 +1030 Subject: module: add new state MODULE_STATE_UNFORMED. You should never look at such a module, so it's excised from all paths which traverse the modules list. We add the state at the end, to avoid gratuitous ABI break (ksplice). Signed-off-by: Rusty Russell --- include/linux/module.h | 10 ++++---- kernel/debug/kdb/kdb_main.c | 2 ++ kernel/module.c | 57 +++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 59 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 7760c6d344a3..1375ee3f03aa 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -199,11 +199,11 @@ struct module_use { struct module *source, *target; }; -enum module_state -{ - MODULE_STATE_LIVE, - MODULE_STATE_COMING, - MODULE_STATE_GOING, +enum module_state { + MODULE_STATE_LIVE, /* Normal state. */ + MODULE_STATE_COMING, /* Full formed, running module_init. */ + MODULE_STATE_GOING, /* Going away. */ + MODULE_STATE_UNFORMED, /* Still setting it up. */ }; /** diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 4d5f8d5612f3..8875254120b6 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1970,6 +1970,8 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf("Module Size modstruct Used by\n"); list_for_each_entry(mod, kdb_modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; kdb_printf("%-20s%8u 0x%p ", mod->name, mod->core_size, (void *)mod); diff --git a/kernel/module.c b/kernel/module.c index 41bc1189b061..c3a2ee8e3679 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -188,6 +188,7 @@ struct load_info { ongoing or failed initialization etc. */ static inline int strong_try_module_get(struct module *mod) { + BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED); if (mod && mod->state == MODULE_STATE_COMING) return -EBUSY; if (try_module_get(mod)) @@ -343,6 +344,9 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, #endif }; + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data)) return true; } @@ -450,16 +454,24 @@ const struct kernel_symbol *find_symbol(const char *name, EXPORT_SYMBOL_GPL(find_symbol); /* Search for module by name: must hold module_mutex. */ -struct module *find_module(const char *name) +static struct module *find_module_all(const char *name, + bool even_unformed) { struct module *mod; list_for_each_entry(mod, &modules, list) { + if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) + continue; if (strcmp(mod->name, name) == 0) return mod; } return NULL; } + +struct module *find_module(const char *name) +{ + return find_module_all(name, false); +} EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP @@ -525,6 +537,8 @@ bool is_module_percpu_address(unsigned long addr) preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (!mod->percpu_size) continue; for_each_possible_cpu(cpu) { @@ -1048,6 +1062,8 @@ static ssize_t show_initstate(struct module_attribute *mattr, case MODULE_STATE_GOING: state = "going"; break; + default: + BUG(); } return sprintf(buffer, "%s\n", state); } @@ -1786,6 +1802,8 @@ void set_all_modules_text_rw(void) mutex_lock(&module_mutex); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if ((mod->module_core) && (mod->core_text_size)) { set_page_attributes(mod->module_core, mod->module_core + mod->core_text_size, @@ -1807,6 +1825,8 @@ void set_all_modules_text_ro(void) mutex_lock(&module_mutex); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if ((mod->module_core) && (mod->core_text_size)) { set_page_attributes(mod->module_core, mod->module_core + mod->core_text_size, @@ -2998,7 +3018,8 @@ static bool finished_loading(const char *name) mutex_lock(&module_mutex); mod = find_module(name); - ret = !mod || mod->state != MODULE_STATE_COMING; + ret = !mod || mod->state == MODULE_STATE_LIVE + || mod->state == MODULE_STATE_GOING; mutex_unlock(&module_mutex); return ret; @@ -3361,6 +3382,8 @@ const char *module_address_lookup(unsigned long addr, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (within_module_init(addr, mod) || within_module_core(addr, mod)) { if (modname) @@ -3384,6 +3407,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (within_module_init(addr, mod) || within_module_core(addr, mod)) { const char *sym; @@ -3408,6 +3433,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (within_module_init(addr, mod) || within_module_core(addr, mod)) { const char *sym; @@ -3435,6 +3462,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (symnum < mod->num_symtab) { *value = mod->symtab[symnum].st_value; *type = mod->symtab[symnum].st_info; @@ -3477,9 +3506,12 @@ unsigned long module_kallsyms_lookup_name(const char *name) ret = mod_find_symname(mod, colon+1); *colon = ':'; } else { - list_for_each_entry_rcu(mod, &modules, list) + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if ((ret = mod_find_symname(mod, name)) != 0) break; + } } preempt_enable(); return ret; @@ -3494,6 +3526,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, int ret; list_for_each_entry(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; for (i = 0; i < mod->num_symtab; i++) { ret = fn(data, mod->strtab + mod->symtab[i].st_name, mod, mod->symtab[i].st_value); @@ -3509,6 +3543,7 @@ static char *module_flags(struct module *mod, char *buf) { int bx = 0; + BUG_ON(mod->state == MODULE_STATE_UNFORMED); if (mod->taints || mod->state == MODULE_STATE_GOING || mod->state == MODULE_STATE_COMING) { @@ -3550,6 +3585,10 @@ static int m_show(struct seq_file *m, void *p) struct module *mod = list_entry(p, struct module, list); char buf[8]; + /* We always ignore unformed modules. */ + if (mod->state == MODULE_STATE_UNFORMED) + return 0; + seq_printf(m, "%s %u", mod->name, mod->init_size + mod->core_size); print_unload_info(m, mod); @@ -3610,6 +3649,8 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (mod->num_exentries == 0) continue; @@ -3658,10 +3699,13 @@ struct module *__module_address(unsigned long addr) if (addr < module_addr_min || addr > module_addr_max) return NULL; - list_for_each_entry_rcu(mod, &modules, list) + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (within_module_core(addr, mod) || within_module_init(addr, mod)) return mod; + } return NULL; } EXPORT_SYMBOL_GPL(__module_address); @@ -3714,8 +3758,11 @@ void print_modules(void) printk(KERN_DEFAULT "Modules linked in:"); /* Most callers should already have preempt disabled, but make sure */ preempt_disable(); - list_for_each_entry_rcu(mod, &modules, list) + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; printk(" %s%s", mod->name, module_flags(mod, buf)); + } preempt_enable(); if (last_unloaded_module[0]) printk(" [last unloaded: %s]", last_unloaded_module); -- cgit v1.2.3 From 803739d25c2343da6d2f95eebdcbc08bf67097d4 Mon Sep 17 00:00:00 2001 From: Shane Huang Date: Mon, 17 Dec 2012 23:18:59 +0800 Subject: [libata] replace sata_settings with devslp_timing NCQ capability was used to check availability of SATA Settings page from Identify Device Data Log, which contains DevSlp timing variables. It does not work on some HDDs and leads to error messages. IDENTIFY word 78 bit 5(Hardware Feature Control) can't work either because it is only the sufficient condition of Identify Device data log, not the necessary condition. This patch replaced ata_device->sata_settings with ->devslp_timing to only save DevSlp timing variables(8 bytes), instead of the whole SATA Settings page(512 bytes). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=51881 Reported-by: Borislav Petkov Signed-off-by: Shane Huang Cc: stable@vger.kernel.org Signed-off-by: Jeff Garzik --- drivers/ata/libahci.c | 6 +++--- drivers/ata/libata-core.c | 22 +++++++++++++--------- include/linux/ata.h | 8 +++++--- include/linux/libata.h | 4 ++-- 4 files changed, 23 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 320712a7b9ea..6cd7805e47ca 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1951,13 +1951,13 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep) /* Use the nominal value 10 ms if the read MDAT is zero, * the nominal value of DETO is 20 ms. */ - if (dev->sata_settings[ATA_LOG_DEVSLP_VALID] & + if (dev->devslp_timing[ATA_LOG_DEVSLP_VALID] & ATA_LOG_DEVSLP_VALID_MASK) { - mdat = dev->sata_settings[ATA_LOG_DEVSLP_MDAT] & + mdat = dev->devslp_timing[ATA_LOG_DEVSLP_MDAT] & ATA_LOG_DEVSLP_MDAT_MASK; if (!mdat) mdat = 10; - deto = dev->sata_settings[ATA_LOG_DEVSLP_DETO]; + deto = dev->devslp_timing[ATA_LOG_DEVSLP_DETO]; if (!deto) deto = 20; } else { diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 9e8b99af400d..46cd3f4c6aaa 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2325,24 +2325,28 @@ int ata_dev_configure(struct ata_device *dev) } } - /* check and mark DevSlp capability */ - if (ata_id_has_devslp(dev->id)) - dev->flags |= ATA_DFLAG_DEVSLP; - - /* Obtain SATA Settings page from Identify Device Data Log, - * which contains DevSlp timing variables etc. - * Exclude old devices with ata_id_has_ncq() + /* Check and mark DevSlp capability. Get DevSlp timing variables + * from SATA Settings page of Identify Device Data Log. */ - if (ata_id_has_ncq(dev->id)) { + if (ata_id_has_devslp(dev->id)) { + u8 sata_setting[ATA_SECT_SIZE]; + int i, j; + + dev->flags |= ATA_DFLAG_DEVSLP; err_mask = ata_read_log_page(dev, ATA_LOG_SATA_ID_DEV_DATA, ATA_LOG_SATA_SETTINGS, - dev->sata_settings, + sata_setting, 1); if (err_mask) ata_dev_dbg(dev, "failed to get Identify Device Data, Emask 0x%x\n", err_mask); + else + for (i = 0; i < ATA_LOG_DEVSLP_SIZE; i++) { + j = ATA_LOG_DEVSLP_OFFSET + i; + dev->devslp_timing[i] = sata_setting[j]; + } } dev->cdb_len = 16; diff --git a/include/linux/ata.h b/include/linux/ata.h index 408da9502177..8f7a3d68371a 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -297,10 +297,12 @@ enum { ATA_LOG_SATA_NCQ = 0x10, ATA_LOG_SATA_ID_DEV_DATA = 0x30, ATA_LOG_SATA_SETTINGS = 0x08, - ATA_LOG_DEVSLP_MDAT = 0x30, + ATA_LOG_DEVSLP_OFFSET = 0x30, + ATA_LOG_DEVSLP_SIZE = 0x08, + ATA_LOG_DEVSLP_MDAT = 0x00, ATA_LOG_DEVSLP_MDAT_MASK = 0x1F, - ATA_LOG_DEVSLP_DETO = 0x31, - ATA_LOG_DEVSLP_VALID = 0x37, + ATA_LOG_DEVSLP_DETO = 0x01, + ATA_LOG_DEVSLP_VALID = 0x07, ATA_LOG_DEVSLP_VALID_MASK = 0x80, /* READ/WRITE LONG (obsolete) */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 83ba0ab2c915..649e5f86b5f0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -652,8 +652,8 @@ struct ata_device { u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ }; - /* Identify Device Data Log (30h), SATA Settings (page 08h) */ - u8 sata_settings[ATA_SECT_SIZE]; + /* DEVSLP Timing Variables from Identify Device Data Log */ + u8 devslp_timing[ATA_LOG_DEVSLP_SIZE]; /* error history */ int spdn_cnt; -- cgit v1.2.3 From 8aef33a7cf40ca9da188e8578b2abe7267a38c52 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 15 Jan 2013 14:18:04 +0100 Subject: cpuidle: remove the power_specified field in the driver We realized that the power usage field is never filled and when it is filled for tegra, the power_specified flag is not set causing all of these values to be reset when the driver is initialized with set_power_state(). However, the power_specified flag can be simply removed under the assumption that the states are always backward sorted, which is the case with the current code. This change allows the menu governor select function and the cpuidle_play_dead() to be simplified. Moreover, the set_power_states() function can removed as it does not make sense any more. Drop the power_specified flag from struct cpuidle_driver and make the related changes as described above. As a consequence, this also fixes the bug where on the dynamic C-states system, the power fields are not initialized. [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=42870 References: https://bugzilla.kernel.org/show_bug.cgi?id=43349 References: https://lkml.org/lkml/2012/10/16/518 Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 17 ++++------------- drivers/cpuidle/driver.c | 25 ------------------------- drivers/cpuidle/governors/menu.c | 8 ++------ include/linux/cpuidle.h | 2 +- 4 files changed, 7 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index fb4a7dd57f94..e1f6860e069c 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -69,24 +69,15 @@ int cpuidle_play_dead(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); - int i, dead_state = -1; - int power_usage = INT_MAX; + int i; if (!drv) return -ENODEV; /* Find lowest-power state that supports long-term idle */ - for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { - struct cpuidle_state *s = &drv->states[i]; - - if (s->power_usage < power_usage && s->enter_dead) { - power_usage = s->power_usage; - dead_state = i; - } - } - - if (dead_state != -1) - return drv->states[dead_state].enter_dead(dev, dead_state); + for (i = drv->state_count - 1; i >= CPUIDLE_DRIVER_STATE_START; i--) + if (drv->states[i].enter_dead) + return drv->states[i].enter_dead(dev, i); return -ENODEV; } diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index c2b281afe0ed..422c7b69ba7c 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -19,34 +19,9 @@ DEFINE_SPINLOCK(cpuidle_driver_lock); static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu); static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu); -static void set_power_states(struct cpuidle_driver *drv) -{ - int i; - - /* - * cpuidle driver should set the drv->power_specified bit - * before registering if the driver provides - * power_usage numbers. - * - * If power_specified is not set, - * we fill in power_usage with decreasing values as the - * cpuidle code has an implicit assumption that state Cn - * uses less power than C(n-1). - * - * With CONFIG_ARCH_HAS_CPU_RELAX, C0 is already assigned - * an power value of -1. So we use -2, -3, etc, for other - * c-states. - */ - for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) - drv->states[i].power_usage = -1 - i; -} - static void __cpuidle_driver_init(struct cpuidle_driver *drv) { drv->refcnt = 0; - - if (!drv->power_specified) - set_power_states(drv); } static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 20ea33afdda1..fe343a06b7da 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -312,7 +312,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &__get_cpu_var(menu_devices); int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); - int power_usage = INT_MAX; int i; int multiplier; struct timespec t; @@ -383,11 +382,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (s->exit_latency * multiplier > data->predicted_us) continue; - if (s->power_usage < power_usage) { - power_usage = s->power_usage; - data->last_state_idx = i; - data->exit_us = s->exit_latency; - } + data->last_state_idx = i; + data->exit_us = s->exit_latency; } /* not deepest C-state chosen for low predicted residency */ diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3711b34dc4f9..24cd1037b6d6 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -126,9 +126,9 @@ struct cpuidle_driver { struct module *owner; int refcnt; - unsigned int power_specified:1; /* set to 1 to use the core cpuidle time keeping (for all states). */ unsigned int en_core_tk_irqen:1; + /* states array must be ordered in decreasing power consumption */ struct cpuidle_state states[CPUIDLE_STATE_MAX]; int state_count; int safe_state_index; -- cgit v1.2.3 From 774a1221e862b343388347bac9b318767336b20b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jan 2013 18:52:51 -0800 Subject: module, async: async_synchronize_full() on module init iff async is used If the default iosched is built as module, the kernel may deadlock while trying to load the iosched module on device probe if the probing was running off async. This is because async_synchronize_full() at the end of module init ends up waiting for the async job which initiated the module loading. async A modprobe 1. finds a device 2. registers the block device 3. request_module(default iosched) 4. modprobe in userland 5. load and init module 6. async_synchronize_full() Async A waits for modprobe to finish in request_module() and modprobe waits for async A to finish in async_synchronize_full(). Because there's no easy to track dependency once control goes out to userland, implementing properly nested flushing is difficult. For now, make module init perform async_synchronize_full() iff module init has queued async jobs as suggested by Linus. This avoids the described deadlock because iosched module doesn't use async and thus wouldn't invoke async_synchronize_full(). This is hacky and incomplete. It will deadlock if async module loading nests; however, this works around the known problem case and seems to be the best of bad options. For more details, please refer to the following thread. http://thread.gmane.org/gmane.linux.kernel/1420814 Signed-off-by: Tejun Heo Reported-by: Alex Riesen Tested-by: Ming Lei Tested-by: Alex Riesen Cc: Arjan van de Ven Cc: Jens Axboe Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + kernel/async.c | 3 +++ kernel/module.c | 27 +++++++++++++++++++++++++-- 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 206bb089c06b..6fc8f45de4e9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1810,6 +1810,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ +#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ diff --git a/kernel/async.c b/kernel/async.c index 9d3118384858..a1d585c351d6 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -196,6 +196,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); + /* mark that this task has queued an async job, used by module init */ + current->flags |= PF_USED_ASYNC; + /* schedule for execution */ queue_work(system_unbound_wq, &entry->work); diff --git a/kernel/module.c b/kernel/module.c index 250092c1d57d..b10b048367e1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3013,6 +3013,12 @@ static int do_init_module(struct module *mod) { int ret = 0; + /* + * We want to find out whether @mod uses async during init. Clear + * PF_USED_ASYNC. async_schedule*() will set it. + */ + current->flags &= ~PF_USED_ASYNC; + blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); @@ -3058,8 +3064,25 @@ static int do_init_module(struct module *mod) blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_LIVE, mod); - /* We need to finish all async code before the module init sequence is done */ - async_synchronize_full(); + /* + * We need to finish all async code before the module init sequence + * is done. This has potential to deadlock. For example, a newly + * detected block device can trigger request_module() of the + * default iosched from async probing task. Once userland helper + * reaches here, async_synchronize_full() will wait on the async + * task waiting on request_module() and deadlock. + * + * This deadlock is avoided by perfomring async_synchronize_full() + * iff module init queued any async jobs. This isn't a full + * solution as it will deadlock the same if module loading from + * async jobs nests more than once; however, due to the various + * constraints, this hack seems to be the best option for now. + * Please refer to the following thread for details. + * + * http://thread.gmane.org/gmane.linux.kernel/1420814 + */ + if (current->flags & PF_USED_ASYNC) + async_synchronize_full(); mutex_lock(&module_mutex); /* Drop initial reference. */ -- cgit v1.2.3 From e65b9ad222c280c031bc8d3642cc38dd3026fe06 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 15 Jan 2013 20:12:37 +0100 Subject: lockdep, rwsem: fix down_write_nest_lock() if !CONFIG_DEBUG_LOCK_ALLOC Commit 1b963c81b145 ("lockdep, rwsem: provide down_write_nest_lock()") contains a bug in a codepath when CONFIG_DEBUG_LOCK_ALLOC is disabled, which causes down_read() to be called instead of down_write() by mistake on such configurations. Fix that. Reported-and-tested-by: Andrew Clayton Reported-and-tested-by: Zlatko Calusic Signed-off-by: Jiri Kosina Reviewed-by: Rik van Riel Signed-off-by: Linus Torvalds --- include/linux/rwsem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 413cc11e414a..8da67d625e13 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -135,7 +135,7 @@ do { \ #else # define down_read_nested(sem, subclass) down_read(sem) -# define down_write_nest_lock(sem, nest_lock) down_read(sem) +# define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) #endif -- cgit v1.2.3 From ebebd49a8eab5e9aa1b1f8f1614ccc3c2120f886 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Thu, 17 Jan 2013 14:14:53 -0800 Subject: 8250/16?50: Add support for Broadcom TruManage redirected serial port Add support for the UART device present in Broadcom TruManage capable NetXtreme chips (ie: 5761m 5762, and 5725). This implementation has a hidden transmit FIFO, so running in single-byte interrupt mode results in too many interrupts. The UART_CAP_HFIFO capability was added to track this. It continues to reload the THR as long as the THRE and TSRE bits are set in the LSR up to a specified limit (1024 is used here). Signed-off-by: Stephen Hurd Signed-off-by: Michael Chan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.c | 11 +++++++++++ drivers/tty/serial/8250/8250.h | 1 + drivers/tty/serial/8250/8250_pci.c | 38 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/serial_core.h | 3 ++- 4 files changed, 52 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c index d085e3a8ec06..f9320437a649 100644 --- a/drivers/tty/serial/8250/8250.c +++ b/drivers/tty/serial/8250/8250.c @@ -300,6 +300,12 @@ static const struct serial8250_config uart_config[] = { UART_FCR_R_TRIG_00 | UART_FCR_T_TRIG_00, .flags = UART_CAP_FIFO, }, + [PORT_BRCM_TRUMANAGE] = { + .name = "TruManage", + .fifo_size = 1, + .tx_loadsz = 1024, + .flags = UART_CAP_HFIFO, + }, [PORT_8250_CIR] = { .name = "CIR port" } @@ -1490,6 +1496,11 @@ void serial8250_tx_chars(struct uart_8250_port *up) port->icount.tx++; if (uart_circ_empty(xmit)) break; + if (up->capabilities & UART_CAP_HFIFO) { + if ((serial_port_in(port, UART_LSR) & BOTH_EMPTY) != + BOTH_EMPTY) + break; + } } while (--count > 0); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 3b4ea84898c2..12caa1292b75 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -40,6 +40,7 @@ struct serial8250_config { #define UART_CAP_AFE (1 << 11) /* MCR-based hw flow control */ #define UART_CAP_UUE (1 << 12) /* UART needs IER bit 6 set (Xscale) */ #define UART_CAP_RTOIE (1 << 13) /* UART needs IER bit 4 set (Xscale, Tegra) */ +#define UART_CAP_HFIFO (1 << 14) /* UART has a "hidden" FIFO */ #define UART_BUG_QUOT (1 << 0) /* UART has buggy quot LSB */ #define UART_BUG_TXEN (1 << 1) /* UART has buggy TX IIR status */ diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 8a2c3d934187..a27a98e1b066 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1085,6 +1085,18 @@ pci_omegapci_setup(struct serial_private *priv, return setup_port(priv, port, 2, idx * 8, 0); } +static int +pci_brcm_trumanage_setup(struct serial_private *priv, + const struct pciserial_board *board, + struct uart_8250_port *port, int idx) +{ + int ret = pci_default_setup(priv, board, port, idx); + + port->port.type = PORT_BRCM_TRUMANAGE; + port->port.flags = (port->port.flags | UPF_FIXED_PORT | UPF_FIXED_TYPE); + return ret; +} + static int skip_tx_en_setup(struct serial_private *priv, const struct pciserial_board *board, struct uart_8250_port *port, int idx) @@ -1304,6 +1316,7 @@ pci_wch_ch353_setup(struct serial_private *priv, #define PCI_DEVICE_ID_COMMTECH_4224PCIE 0x0020 #define PCI_DEVICE_ID_COMMTECH_4228PCIE 0x0021 #define PCI_DEVICE_ID_COMMTECH_4222PCIE 0x0022 +#define PCI_DEVICE_ID_BROADCOM_TRUMANAGE 0x160a /* Unknown vendors/cards - this should not be in linux/pci_ids.h */ @@ -1953,6 +1966,17 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_xr17v35x_setup, }, + /* + * Broadcom TruManage (NetXtreme) + */ + { + .vendor = PCI_VENDOR_ID_BROADCOM, + .device = PCI_DEVICE_ID_BROADCOM_TRUMANAGE, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_brcm_trumanage_setup, + }, + /* * Default "match everything" terminator entry */ @@ -2148,6 +2172,7 @@ enum pci_board_num_t { pbn_ce4100_1_115200, pbn_omegapci, pbn_NETMOS9900_2s_115200, + pbn_brcm_trumanage, }; /* @@ -2892,6 +2917,12 @@ static struct pciserial_board pci_boards[] = { .num_ports = 2, .base_baud = 115200, }, + [pbn_brcm_trumanage] = { + .flags = FL_BASE0, + .num_ports = 1, + .reg_shift = 2, + .base_baud = 115200, + }, }; static const struct pci_device_id blacklist[] = { @@ -4470,6 +4501,13 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_omegapci }, + /* + * Broadcom TruManage + */ + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_BROADCOM_TRUMANAGE, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_brcm_trumanage }, + /* * AgeStar as-prs2-009 */ diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 78f99d97475b..2c6c85f18ea0 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -50,7 +50,8 @@ #define PORT_LPC3220 22 /* NXP LPC32xx SoC "Standard" UART */ #define PORT_8250_CIR 23 /* CIR infrared port, has its own driver */ #define PORT_XR17V35X 24 /* Exar XR17V35x UARTs */ -#define PORT_MAX_8250 24 /* max port ID */ +#define PORT_BRCM_TRUMANAGE 24 +#define PORT_MAX_8250 25 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed -- cgit v1.2.3 From 2f91ec8cc456d6e57db3ebced34a9e96a356168f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 26 Dec 2012 03:19:55 +0300 Subject: asm-generic, mm: pgtable: convert my_zero_pfn() to macros to fix build Commit 816422ad7647 ("asm-generic, mm: pgtable: consolidate zero page helpers") broke the compile on MIPS if SPARSEMEM is enabled. We get this: In file included from arch/mips/include/asm/pgtable.h:552, from include/linux/mm.h:44, from arch/mips/kernel/asm-offsets.c:14: include/asm-generic/pgtable.h: In function 'my_zero_pfn': include/asm-generic/pgtable.h:466: error: implicit declaration of function 'page_to_section' In file included from arch/mips/kernel/asm-offsets.c:14: include/linux/mm.h: At top level: include/linux/mm.h:738: error: conflicting types for 'page_to_section' include/asm-generic/pgtable.h:466: note: previous implicit declaration of 'page_to_section' was here Due header files inter-dependencies, the only way I see to fix it is convert my_zero_pfn() for __HAVE_COLOR_ZERO_PAGE to macros. Signed-off-by: Kirill A. Shutemov Tested-by: Aaro Koskinen Acked-by: David Daney Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 701beab27aab..5cf680a98f9b 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -461,10 +461,8 @@ static inline int is_zero_pfn(unsigned long pfn) return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); } -static inline unsigned long my_zero_pfn(unsigned long addr) -{ - return page_to_pfn(ZERO_PAGE(addr)); -} +#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) + #else static inline int is_zero_pfn(unsigned long pfn) { -- cgit v1.2.3 From edea0d03ee5f0ae0051b6adb6681ebdf976b1ca4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 20 Jan 2013 20:25:47 +0100 Subject: ia64: kill thread_matches(), unexport ptrace_check_attach() The ia64 function "thread_matches()" has no users since commit e868a55c2a8c ("[IA64] remove find_thread_for_addr()"). Remove it. This allows us to make ptrace_check_attach() static to kernel/ptrace.c, which is good since we'll need to change the semantics of it and fix up all the callers. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- arch/ia64/kernel/ptrace.c | 27 --------------------------- include/linux/ptrace.h | 1 - kernel/ptrace.c | 2 +- 3 files changed, 1 insertion(+), 29 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 4265ff64219b..b7a5fffe0924 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -672,33 +672,6 @@ ptrace_attach_sync_user_rbs (struct task_struct *child) read_unlock(&tasklist_lock); } -static inline int -thread_matches (struct task_struct *thread, unsigned long addr) -{ - unsigned long thread_rbs_end; - struct pt_regs *thread_regs; - - if (ptrace_check_attach(thread, 0) < 0) - /* - * If the thread is not in an attachable state, we'll - * ignore it. The net effect is that if ADDR happens - * to overlap with the portion of the thread's - * register backing store that is currently residing - * on the thread's kernel stack, then ptrace() may end - * up accessing a stale value. But if the thread - * isn't stopped, that's a problem anyhow, so we're - * doing as well as we can... - */ - return 0; - - thread_regs = task_pt_regs(thread); - thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL); - if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end)) - return 0; - - return 1; /* looks like we've got a winner */ -} - /* * Write f32-f127 back to task->thread.fph if it has been modified. */ diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 1693775ecfe8..89573a33ab3c 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -45,7 +45,6 @@ extern long arch_ptrace(struct task_struct *child, long request, extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); extern void ptrace_disable(struct task_struct *); -extern int ptrace_check_attach(struct task_struct *task, bool ignore_state); extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern void ptrace_notify(int exit_code); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1599157336a6..612a56126851 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -139,7 +139,7 @@ void __ptrace_unlink(struct task_struct *child) * RETURNS: * 0 on success, -ESRCH if %child is not ready. */ -int ptrace_check_attach(struct task_struct *child, bool ignore_state) +static int ptrace_check_attach(struct task_struct *child, bool ignore_state) { int ret = -ESRCH; -- cgit v1.2.3 From 910ffdb18a6408e14febbb6e4b6840fd2c928c82 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Jan 2013 20:47:41 +0100 Subject: ptrace: introduce signal_wake_up_state() and ptrace_signal_wake_up() Cleanup and preparation for the next change. signal_wake_up(resume => true) is overused. None of ptrace/jctl callers actually want to wakeup a TASK_WAKEKILL task, but they can't specify the necessary mask. Turn signal_wake_up() into signal_wake_up_state(state), reintroduce signal_wake_up() as a trivial helper, and add ptrace_signal_wake_up() which adds __TASK_TRACED. This way ptrace_signal_wake_up() can work "inside" ptrace_request() even if the tracee doesn't have the TASK_WAKEKILL bit set. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/sched.h | 11 ++++++++++- kernel/ptrace.c | 8 ++++---- kernel/signal.c | 14 ++++---------- 3 files changed, 18 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6fc8f45de4e9..d2112477ff5e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2714,7 +2714,16 @@ static inline void thread_group_cputime_init(struct signal_struct *sig) extern void recalc_sigpending_and_wake(struct task_struct *t); extern void recalc_sigpending(void); -extern void signal_wake_up(struct task_struct *t, int resume_stopped); +extern void signal_wake_up_state(struct task_struct *t, unsigned int state); + +static inline void signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); +} +static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? __TASK_TRACED : 0); +} /* * Wrappers for p->thread_info->cpu access. No-op on UP. diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 612a56126851..62f7c2774b16 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -117,7 +117,7 @@ void __ptrace_unlink(struct task_struct *child) * TASK_KILLABLE sleeps. */ if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) - signal_wake_up(child, task_is_traced(child)); + ptrace_signal_wake_up(child, true); spin_unlock(&child->sighand->siglock); } @@ -317,7 +317,7 @@ static int ptrace_attach(struct task_struct *task, long request, */ if (task_is_stopped(task) && task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) - signal_wake_up(task, 1); + signal_wake_up_state(task, __TASK_STOPPED); spin_unlock(&task->sighand->siglock); @@ -737,7 +737,7 @@ int ptrace_request(struct task_struct *child, long request, * tracee into STOP. */ if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP))) - signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); + ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); unlock_task_sighand(child, &flags); ret = 0; @@ -763,7 +763,7 @@ int ptrace_request(struct task_struct *child, long request, * start of this trap and now. Trigger re-trap. */ if (child->jobctl & JOBCTL_TRAP_NOTIFY) - signal_wake_up(child, true); + ptrace_signal_wake_up(child, true); ret = 0; } unlock_task_sighand(child, &flags); diff --git a/kernel/signal.c b/kernel/signal.c index 53cd5c4d1172..6e97aa6fa32c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -680,23 +680,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) * No need to set need_resched since signal event passing * goes through ->blocked */ -void signal_wake_up(struct task_struct *t, int resume) +void signal_wake_up_state(struct task_struct *t, unsigned int state) { - unsigned int mask; - set_tsk_thread_flag(t, TIF_SIGPENDING); - /* - * For SIGKILL, we want to wake it up in the stopped/traced/killable + * TASK_WAKEKILL also means wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it * executing another processor and just now entering stopped state. * By using wake_up_state, we ensure the process will wake up and * handle its death signal. */ - mask = TASK_INTERRUPTIBLE; - if (resume) - mask |= TASK_WAKEKILL; - if (!wake_up_state(t, mask)) + if (!wake_up_state(t, state | TASK_INTERRUPTIBLE)) kick_process(t); } @@ -844,7 +838,7 @@ static void ptrace_trap_notify(struct task_struct *t) assert_spin_locked(&t->sighand->siglock); task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); - signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); + ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); } /* -- cgit v1.2.3