From 7f78e0351394052e1a6293e175825eb5c7869507 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 2 Mar 2013 19:39:14 -0800 Subject: fs: Limit sys_mount to only request filesystem modules. Modify the request_module to prefix the file system type with "fs-" and add aliases to all of the filesystems that can be built as modules to match. A common practice is to build all of the kernel code and leave code that is not commonly needed as modules, with the result that many users are exposed to any bug anywhere in the kernel. Looking for filesystems with a fs- prefix limits the pool of possible modules that can be loaded by mount to just filesystems trivially making things safer with no real cost. Using aliases means user space can control the policy of which filesystem modules are auto-loaded by editing /etc/modprobe.d/*.conf with blacklist and alias directives. Allowing simple, safe, well understood work-arounds to known problematic software. This also addresses a rare but unfortunate problem where the filesystem name is not the same as it's module name and module auto-loading would not work. While writing this patch I saw a handful of such cases. The most significant being autofs that lives in the module autofs4. This is relevant to user namespaces because we can reach the request module in get_fs_type() without having any special permissions, and people get uncomfortable when a user specified string (in this case the filesystem type) goes all of the way to request_module. After having looked at this issue I don't think there is any particular reason to perform any filtering or permission checks beyond making it clear in the module request that we want a filesystem module. The common pattern in the kernel is to call request_module() without regards to the users permissions. In general all a filesystem module does once loaded is call register_filesystem() and go to sleep. Which means there is not much attack surface exposed by loading a filesytem module unless the filesystem is mounted. In a user namespace filesystems are not mounted unless .fs_flags = FS_USERNS_MOUNT, which most filesystems do not set today. Acked-by: Serge Hallyn Acked-by: Kees Cook Reported-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- arch/powerpc/platforms/cell/spufs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 863184b182f4..3f3bb4cdbbec 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -749,6 +749,7 @@ static struct file_system_type spufs_type = { .mount = spufs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("spufs"); static int __init spufs_init(void) { -- cgit v1.2.3 From 6b6680c4ea3952af8ae76915cbca41245147741b Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 25 Feb 2013 16:51:49 +0000 Subject: powerpc/pseries/hvcserver: Fix strncpy buffer limit in location code the dest buf len is 80 (HVCS_CLC_LENGTH + 1). the src buf len is PAGE_SIZE. if src buf string len is more than 80, it will cause issue. Signed-off-by: Chen Gang Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/hvcserver.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c index fcf4b4cbeaf3..4557e91626c4 100644 --- a/arch/powerpc/platforms/pseries/hvcserver.c +++ b/arch/powerpc/platforms/pseries/hvcserver.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -188,9 +189,9 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head, = (unsigned int)last_p_partition_ID; /* copy the Null-term char too */ - strncpy(&next_partner_info->location_code[0], + strlcpy(&next_partner_info->location_code[0], (char *)&pi_buff[2], - strlen((char *)&pi_buff[2]) + 1); + sizeof(next_partner_info->location_code)); list_add_tail(&(next_partner_info->node), head); next_partner_info = NULL; -- cgit v1.2.3 From ff2d7587c7b2a1b46abc7618f45b8cc3476d8716 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Mon, 11 Mar 2013 13:44:55 +0000 Subject: powerpc: Remove last traces of POWER4_ONLY The Kconfig symbol POWER4_ONLY got removed in commit 694caf0255dcab506d1e174c96a65ab65d96e108 ("powerpc: Remove CONFIG_POWER4_ONLY"). Remove its last traces. Signed-off-by: Paul Bolle Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/Kconfig.cputype | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index cea2f09c4241..18e3b76c78d7 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -124,9 +124,8 @@ config 6xx select PPC_HAVE_PMU_SUPPORT config POWER3 - bool depends on PPC64 && PPC_BOOK3S - default y if !POWER4_ONLY + def_bool y config POWER4 depends on PPC64 && PPC_BOOK3S @@ -145,8 +144,7 @@ config TUNE_CELL but somewhat slower on other machines. This option only changes the scheduling of instructions, not the selection of instructions itself, so the resulting kernel will keep running on all other - machines. When building a kernel that is supposed to run only - on Cell, you should also select the POWER4_ONLY option. + machines. # this is temp to handle compat with arch=ppc config 8xx -- cgit v1.2.3 From 9997d08806062cb7ba471ab12fa2742cfec2f413 Mon Sep 17 00:00:00 2001 From: Ben Collins Date: Mon, 18 Mar 2013 19:19:07 -0400 Subject: sgy-cts1000: Remove __dev* attributes Somehow the driver snuck in with these still in it. Signed-off-by: Ben Collins Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/85xx/sgy_cts1000.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index 611e92f291c4..7179726ba5c5 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -69,7 +69,7 @@ static irqreturn_t gpio_halt_irq(int irq, void *__data) return IRQ_HANDLED; }; -static int __devinit gpio_halt_probe(struct platform_device *pdev) +static int gpio_halt_probe(struct platform_device *pdev) { enum of_gpio_flags flags; struct device_node *node = pdev->dev.of_node; @@ -128,7 +128,7 @@ static int __devinit gpio_halt_probe(struct platform_device *pdev) return 0; } -static int __devexit gpio_halt_remove(struct platform_device *pdev) +static int gpio_halt_remove(struct platform_device *pdev) { if (halt_node) { int gpio = of_get_gpio(halt_node, 0); @@ -165,7 +165,7 @@ static struct platform_driver gpio_halt_driver = { .of_match_table = gpio_halt_match, }, .probe = gpio_halt_probe, - .remove = __devexit_p(gpio_halt_remove), + .remove = gpio_halt_remove, }; module_platform_driver(gpio_halt_driver); -- cgit v1.2.3 From 9fb2640159f9d4f5a2a9d60e490482d4cbecafdb Mon Sep 17 00:00:00 2001 From: Michael Wolf Date: Fri, 5 Apr 2013 10:41:40 +0000 Subject: powerpc: pSeries_lpar_hpte_remove fails from Adjunct partition being performed before the ANDCOND test Some versions of pHyp will perform the adjunct partition test before the ANDCOND test. The result of this is that H_RESOURCE can be returned and cause the BUG_ON condition to occur. The HPTE is not removed. So add a check for H_RESOURCE, it is ok if this HPTE is not removed as pSeries_lpar_hpte_remove is looking for an HPTE to remove and not a specific HPTE to remove. So it is ok to just move on to the next slot and try again. Cc: stable@vger.kernel.org Signed-off-by: Michael Wolf Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/pseries/lpar.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0da39fed355a..299731e9036b 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -186,7 +186,13 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group) (0x1UL << 4), &dummy1, &dummy2); if (lpar_rc == H_SUCCESS) return i; - BUG_ON(lpar_rc != H_NOT_FOUND); + + /* + * The test for adjunct partition is performed before the + * ANDCOND test. H_RESOURCE may be returned, so we need to + * check for that as well. + */ + BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE); slot_offset++; slot_offset &= 0x7; -- cgit v1.2.3 From 9296d94d83649e1c2f25c87dc4ead9c2ab073305 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 9 Apr 2013 14:29:26 +0200 Subject: USB: remove USB_EHCI_BIG_ENDIAN_{DESC,MMIO} depends on architecture symbol Just like the OHCI counter part we just can remove the architecture specific symbols which prevent these configuration symbols from being selected by platforms/architectures requiring it. The original implementation did not scale at all since it required each and every single architecture to be added for these configuration symbols to be selected. Now it is up to the EHCI driver and/or platform to select these configuration symbols accordingly. Acked-by: Alan Stern Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- arch/arm/Kconfig | 2 ++ arch/mips/Kconfig | 3 +++ arch/powerpc/platforms/44x/Kconfig | 2 ++ arch/powerpc/platforms/512x/Kconfig | 2 ++ arch/sparc/Kconfig | 2 ++ drivers/usb/host/Kconfig | 11 ++--------- 6 files changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1cacda426a0e..bbddefea77bb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -549,6 +549,8 @@ config ARCH_IXP4XX select GENERIC_CLOCKEVENTS select MIGHT_HAVE_PCI select NEED_MACH_IO_H + select USB_EHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_DESC help Support for Intel's IXP4XX (XScale) family of processors. diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 51244bf97271..3a7b3954ce1b 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -404,6 +404,8 @@ config PMC_MSP select IRQ_CPU select SERIAL_8250 select SERIAL_8250_CONSOLE + select USB_EHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_DESC help This adds support for the PMC-Sierra family of Multi-Service Processor System-On-A-Chips. These parts include a number @@ -1433,6 +1435,7 @@ config CPU_CAVIUM_OCTEON select CPU_SUPPORTS_HUGEPAGES select LIBFDT select USE_OF + select USB_EHCI_BIG_ENDIAN_MMIO help The Cavium Octeon processor is a highly integrated chip containing many ethernet hardware widgets for networking tasks. The processor diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 0effe9f5a1ea..7be93367d92f 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -274,6 +274,8 @@ config 440EPX select IBM_EMAC_EMAC4 select IBM_EMAC_RGMII select IBM_EMAC_ZMII + select USB_EHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_DESC config 440GRX bool diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig index c16999802ecf..381a592826a2 100644 --- a/arch/powerpc/platforms/512x/Kconfig +++ b/arch/powerpc/platforms/512x/Kconfig @@ -7,6 +7,8 @@ config PPC_MPC512x select PPC_PCI_CHOICE select FSL_PCI if PCI select ARCH_WANT_OPTIONAL_GPIOLIB + select USB_EHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_DESC config MPC5121_ADS bool "Freescale MPC5121E ADS" diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 3d361f236308..66dc562950ae 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -407,6 +407,8 @@ config SERIAL_CONSOLE config SPARC_LEON bool "Sparc Leon processor family" depends on SPARC32 + select USB_EHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_DESC ---help--- If you say Y here if you are running on a SPARC-LEON processor. The LEON processor is a synthesizable VHDL model of the diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 1714c6defd23..436b6828afdd 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -110,18 +110,11 @@ config USB_EHCI_HCD_PMC_MSP config USB_EHCI_BIG_ENDIAN_MMIO bool - depends on USB_EHCI_HCD && (PPC_CELLEB || PPC_PS3 || 440EPX || \ - ARCH_IXP4XX || XPS_USB_HCD_XILINX || \ - PPC_MPC512x || CPU_CAVIUM_OCTEON || \ - PMC_MSP || SPARC_LEON || MIPS_SEAD3) - default y + depends on USB_EHCI_HCD config USB_EHCI_BIG_ENDIAN_DESC bool - depends on USB_EHCI_HCD && (440EPX || ARCH_IXP4XX || XPS_USB_HCD_XILINX || \ - PPC_MPC512x || PMC_MSP || SPARC_LEON || \ - MIPS_SEAD3) - default y + depends on USB_EHCI_HCD config XPS_USB_HCD_XILINX bool "Use Xilinx usb host EHCI controller core" -- cgit v1.2.3 From 5d585e5c4840381112011aa257c7a94b2b66f9de Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 29 Apr 2013 15:06:47 -0700 Subject: mm/ppc: use common help functions to free reserved pages Use common help functions to free reserved pages. Signed-off-by: Jiang Liu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Anatolij Gustschin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/crash_dump.c | 5 +---- arch/powerpc/kernel/fadump.c | 5 +---- arch/powerpc/kernel/kvm.c | 7 +------ arch/powerpc/mm/mem.c | 29 ++-------------------------- arch/powerpc/platforms/512x/mpc512x_shared.c | 5 +---- 5 files changed, 6 insertions(+), 45 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index b3ba5163eae2..9ec3fe174cba 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -150,10 +150,7 @@ void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) if (addr <= rtas_end && ((addr + PAGE_SIZE) > rtas_start)) continue; - ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); - init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); - free_page((unsigned long)__va(addr)); - totalram_pages++; + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); } } #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 06c8202a69cf..2230fd0ca3e4 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1045,10 +1045,7 @@ static void fadump_release_memory(unsigned long begin, unsigned long end) if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start)) continue; - ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); - init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); - free_page((unsigned long)__va(addr)); - totalram_pages++; + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); } } diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index a61b133c4f99..6782221d49bd 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -756,12 +756,7 @@ static __init void kvm_free_tmp(void) end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; /* Free the tmp space we don't need */ - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - } + free_reserved_area(start, end, 0, NULL); } static int __init kvm_guest_init(void) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index f1f7409a4183..c756713aaaba 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -405,39 +405,14 @@ void __init mem_init(void) void free_initmem(void) { - unsigned long addr; - ppc_md.progress = ppc_printk_progress; - - addr = (unsigned long)__init_begin; - for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - pr_info("Freeing unused kernel memory: %luk freed\n", - ((unsigned long)__init_end - - (unsigned long)__init_begin) >> 10); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - if (start >= end) - return; - - start = _ALIGN_DOWN(start, PAGE_SIZE); - end = _ALIGN_UP(end, PAGE_SIZE); - pr_info("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - } + free_reserved_area(start, end, 0, "initrd"); } #endif diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index d30235b7e3f7..db6ac389ef8c 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -172,12 +172,9 @@ static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb; static inline void mpc512x_free_bootmem(struct page *page) { - __ClearPageReserved(page); BUG_ON(PageTail(page)); BUG_ON(atomic_read(&page->_count) > 1); - atomic_set(&page->_count, 1); - __free_page(page); - totalram_pages++; + free_reserved_page(page); } void mpc512x_release_bootmem(void) -- cgit v1.2.3 From 4edd7ceff0662afde195da6f6c43e7cbe1ed2dc4 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 29 Apr 2013 15:08:22 -0700 Subject: mm, hotplug: avoid compiling memory hotremove functions when disabled __remove_pages() is only necessary for CONFIG_MEMORY_HOTREMOVE. PowerPC pseries will return -EOPNOTSUPP if unsupported. Adding an #ifdef causes several other functions it depends on to also become unnecessary, which saves in .text when disabled (it's disabled in most defconfigs besides powerpc, including x86). remove_memory_block() becomes static since it is not referenced outside of drivers/base/memory.c. Build tested on x86 and powerpc with CONFIG_MEMORY_HOTREMOVE both enabled and disabled. Signed-off-by: David Rientjes Acked-by: Toshi Kani Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Greg Kroah-Hartman Cc: Wen Congyang Cc: Tang Chen Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/pseries/hotplug-memory.c | 12 +++++ drivers/base/memory.c | 44 +++++++-------- include/linux/memory.h | 3 +- include/linux/memory_hotplug.h | 4 +- mm/memory_hotplug.c | 68 +++++++++++------------ mm/sparse.c | 72 +++++++++++++------------ 6 files changed, 113 insertions(+), 90 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 2372c609fa2b..9a432de363b8 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -72,6 +72,7 @@ unsigned long memory_block_size_bytes(void) return get_memblock_size(); } +#ifdef CONFIG_MEMORY_HOTREMOVE static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { unsigned long start, start_pfn; @@ -153,6 +154,17 @@ static int pseries_remove_memory(struct device_node *np) ret = pseries_remove_memblock(base, lmb_size); return ret; } +#else +static inline int pseries_remove_memblock(unsigned long base, + unsigned int memblock_size) +{ + return -EOPNOTSUPP; +} +static inline int pseries_remove_memory(struct device_node *np) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_MEMORY_HOTREMOVE */ static int pseries_add_memory(struct device_node *np) { diff --git a/drivers/base/memory.c b/drivers/base/memory.c index a51007b79032..65d9799cbb61 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -93,16 +93,6 @@ int register_memory(struct memory_block *memory) return error; } -static void -unregister_memory(struct memory_block *memory) -{ - BUG_ON(memory->dev.bus != &memory_subsys); - - /* drop the ref. we got in remove_memory_block() */ - kobject_put(&memory->dev.kobj); - device_unregister(&memory->dev); -} - unsigned long __weak memory_block_size_bytes(void) { return MIN_MEMORY_BLOCK_SIZE; @@ -637,8 +627,28 @@ static int add_memory_section(int nid, struct mem_section *section, return ret; } -int remove_memory_block(unsigned long node_id, struct mem_section *section, - int phys_device) +/* + * need an interface for the VM to add new memory regions, + * but without onlining it. + */ +int register_new_memory(int nid, struct mem_section *section) +{ + return add_memory_section(nid, section, NULL, MEM_OFFLINE, HOTPLUG); +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +static void +unregister_memory(struct memory_block *memory) +{ + BUG_ON(memory->dev.bus != &memory_subsys); + + /* drop the ref. we got in remove_memory_block() */ + kobject_put(&memory->dev.kobj); + device_unregister(&memory->dev); +} + +static int remove_memory_block(unsigned long node_id, + struct mem_section *section, int phys_device) { struct memory_block *mem; @@ -661,15 +671,6 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, return 0; } -/* - * need an interface for the VM to add new memory regions, - * but without onlining it. - */ -int register_new_memory(int nid, struct mem_section *section) -{ - return add_memory_section(nid, section, NULL, MEM_OFFLINE, HOTPLUG); -} - int unregister_memory_section(struct mem_section *section) { if (!present_section(section)) @@ -677,6 +678,7 @@ int unregister_memory_section(struct mem_section *section) return remove_memory_block(0, section, 0); } +#endif /* CONFIG_MEMORY_HOTREMOVE */ /* * offline one memory block. If the memory block has been offlined, do nothing. diff --git a/include/linux/memory.h b/include/linux/memory.h index 0ff6598ee62f..73817af8b480 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -115,9 +115,10 @@ extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); extern int register_new_memory(int, struct mem_section *); +#ifdef CONFIG_MEMORY_HOTREMOVE extern int unregister_memory_section(struct mem_section *); +#endif extern int memory_dev_init(void); -extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); extern int memory_isolate_notify(unsigned long val, void *v); extern struct memory_block *find_memory_block_hinted(struct mem_section *, diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index b6a3be7d47bf..3e622c610925 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -97,13 +97,13 @@ extern void __online_page_free(struct page *page); #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); extern int arch_remove_memory(u64 start, u64 size); +extern int __remove_pages(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages); #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages in a zone */ extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); -extern int __remove_pages(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages); #ifdef CONFIG_NUMA extern int memory_add_physaddr_to_nid(u64 start); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c916582591eb..60f6daad1076 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -436,6 +436,40 @@ static int __meminit __add_section(int nid, struct zone *zone, return register_new_memory(nid, __pfn_to_section(phys_start_pfn)); } +/* + * Reasonably generic function for adding memory. It is + * expected that archs that support memory hotplug will + * call this function after deciding the zone to which to + * add the new pages. + */ +int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, + unsigned long nr_pages) +{ + unsigned long i; + int err = 0; + int start_sec, end_sec; + /* during initialize mem_map, align hot-added range to section */ + start_sec = pfn_to_section_nr(phys_start_pfn); + end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); + + for (i = start_sec; i <= end_sec; i++) { + err = __add_section(nid, zone, i << PFN_SECTION_SHIFT); + + /* + * EEXIST is finally dealt with by ioresource collision + * check. see add_memory() => register_memory_resource() + * Warning will be printed if there is collision. + */ + if (err && (err != -EEXIST)) + break; + err = 0; + } + + return err; +} +EXPORT_SYMBOL_GPL(__add_pages); + +#ifdef CONFIG_MEMORY_HOTREMOVE /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ static int find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, @@ -658,39 +692,6 @@ static int __remove_section(struct zone *zone, struct mem_section *ms) return 0; } -/* - * Reasonably generic function for adding memory. It is - * expected that archs that support memory hotplug will - * call this function after deciding the zone to which to - * add the new pages. - */ -int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, - unsigned long nr_pages) -{ - unsigned long i; - int err = 0; - int start_sec, end_sec; - /* during initialize mem_map, align hot-added range to section */ - start_sec = pfn_to_section_nr(phys_start_pfn); - end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); - - for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, zone, i << PFN_SECTION_SHIFT); - - /* - * EEXIST is finally dealt with by ioresource collision - * check. see add_memory() => register_memory_resource() - * Warning will be printed if there is collision. - */ - if (err && (err != -EEXIST)) - break; - err = 0; - } - - return err; -} -EXPORT_SYMBOL_GPL(__add_pages); - /** * __remove_pages() - remove sections of pages from a zone * @zone: zone from which pages need to be removed @@ -733,6 +734,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, return ret; } EXPORT_SYMBOL_GPL(__remove_pages); +#endif /* CONFIG_MEMORY_HOTREMOVE */ int set_online_page_callback(online_page_callback_t callback) { diff --git a/mm/sparse.c b/mm/sparse.c index a37be5f9050d..1c91f0d3f6ab 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -620,6 +620,7 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) vmemmap_free(start, end); } +#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { unsigned long start = (unsigned long)memmap; @@ -627,6 +628,7 @@ static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) vmemmap_free(start, end); } +#endif /* CONFIG_MEMORY_HOTREMOVE */ #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) { @@ -664,6 +666,7 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) get_order(sizeof(struct page) * nr_pages)); } +#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { unsigned long maps_section_nr, removing_section_nr, i; @@ -690,40 +693,9 @@ static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) put_page_bootmem(page); } } +#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -static void free_section_usemap(struct page *memmap, unsigned long *usemap) -{ - struct page *usemap_page; - unsigned long nr_pages; - - if (!usemap) - return; - - usemap_page = virt_to_page(usemap); - /* - * Check to see if allocation came from hot-plug-add - */ - if (PageSlab(usemap_page) || PageCompound(usemap_page)) { - kfree(usemap); - if (memmap) - __kfree_section_memmap(memmap, PAGES_PER_SECTION); - return; - } - - /* - * The usemap came from bootmem. This is packed with other usemaps - * on the section which has pgdat at boot time. Just keep it as is now. - */ - - if (memmap) { - nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) - >> PAGE_SHIFT; - - free_map_bootmem(memmap, nr_pages); - } -} - /* * returns the number of sections whose mem_maps were properly * set. If this is <=0, then that means that the passed-in @@ -800,6 +772,39 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif +#ifdef CONFIG_MEMORY_HOTREMOVE +static void free_section_usemap(struct page *memmap, unsigned long *usemap) +{ + struct page *usemap_page; + unsigned long nr_pages; + + if (!usemap) + return; + + usemap_page = virt_to_page(usemap); + /* + * Check to see if allocation came from hot-plug-add + */ + if (PageSlab(usemap_page) || PageCompound(usemap_page)) { + kfree(usemap); + if (memmap) + __kfree_section_memmap(memmap, PAGES_PER_SECTION); + return; + } + + /* + * The usemap came from bootmem. This is packed with other usemaps + * on the section which has pgdat at boot time. Just keep it as is now. + */ + + if (memmap) { + nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) + >> PAGE_SHIFT; + + free_map_bootmem(memmap, nr_pages); + } +} + void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) { struct page *memmap = NULL; @@ -819,4 +824,5 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) clear_hwpoisoned_pages(memmap, PAGES_PER_SECTION); free_section_usemap(memmap, usemap); } -#endif +#endif /* CONFIG_MEMORY_HOTREMOVE */ +#endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit v1.2.3