From 172f3fcb17382faafc71091868370b6765da7a43 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 26 Nov 2015 22:12:02 +0000 Subject: ARM: l2c: tauros2: fix OF-enabled non-DT boot Signed-off-by: Russell King --- arch/arm/mm/cache-tauros2.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c index 1e373d268c04..95eb524ce556 100644 --- a/arch/arm/mm/cache-tauros2.c +++ b/arch/arm/mm/cache-tauros2.c @@ -287,16 +287,15 @@ void __init tauros2_init(unsigned int features) node = of_find_matching_node(NULL, tauros2_ids); if (!node) { pr_info("Not found marvell,tauros2-cache, disable it\n"); - return; + } else { + ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f); + if (ret) { + pr_info("Not found marvell,tauros-cache-features property, " + "disable extra features\n"); + features = 0; + } else + features = f; } - - ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f); - if (ret) { - pr_info("Not found marvell,tauros-cache-features property, " - "disable extra features\n"); - features = 0; - } else - features = f; #endif tauros2_internal_init(features); } -- cgit v1.2.3 From 1d93ba2aaacc96bef018c5c2e12840f07372a2be Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 26 Nov 2015 22:12:26 +0000 Subject: ARM: l2c: tauros2: use descriptive definitions for register bits Use descriptive definitions for the Tauros2 register bits, and while we're here, clean up the "Tauros2: %s line fill burt8." message. Signed-off-by: Russell King --- arch/arm/mm/cache-tauros2.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c index 95eb524ce556..88255bea65e4 100644 --- a/arch/arm/mm/cache-tauros2.c +++ b/arch/arm/mm/cache-tauros2.c @@ -22,6 +22,11 @@ #include #include +/* CP15 PJ4 Control configuration register */ +#define CCR_L2C_PREFETCH_DISABLE BIT(24) +#define CCR_L2C_ECC_ENABLE BIT(23) +#define CCR_L2C_WAY7_4_DISABLE BIT(21) +#define CCR_L2C_BURST8_ENABLE BIT(20) /* * When Tauros2 is used on a CPU that supports the v7 hierarchical @@ -182,18 +187,18 @@ static void enable_extra_feature(unsigned int features) u = read_extra_features(); if (features & CACHE_TAUROS2_PREFETCH_ON) - u &= ~0x01000000; + u &= ~CCR_L2C_PREFETCH_DISABLE; else - u |= 0x01000000; + u |= CCR_L2C_PREFETCH_DISABLE; pr_info("Tauros2: %s L2 prefetch.\n", (features & CACHE_TAUROS2_PREFETCH_ON) ? "Enabling" : "Disabling"); if (features & CACHE_TAUROS2_LINEFILL_BURST8) - u |= 0x00100000; + u |= CCR_L2C_BURST8_ENABLE; else - u &= ~0x00100000; - pr_info("Tauros2: %s line fill burt8.\n", + u &= ~CCR_L2C_BURST8_ENABLE; + pr_info("Tauros2: %s burst8 line fill.\n", (features & CACHE_TAUROS2_LINEFILL_BURST8) ? "Enabling" : "Disabling"); -- cgit v1.2.3 From f19768ce0e84ac8a1ab03f87784bdf94bf94e140 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 19 Oct 2015 16:00:35 +0100 Subject: ARM: orion: implement ARM delay timer Implement an ARM delay timer to be used for udelay() on orion legacy platforms. This allows us to skip the delay loop calibration at boot. It also means that udelay() will be unaffected by CPU frequency changes when cpufreq is enabled on these platforms. Tested-by: Russell King Acked-by: Andrew Lunn Signed-off-by: Russell King --- arch/arm/plat-orion/time.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 8085a8aac812..ffb93db68e9c 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -18,6 +18,7 @@ #include #include #include +#include /* * MBus bridge block registers. @@ -188,6 +189,15 @@ orion_time_set_base(void __iomem *_timer_base) timer_base = _timer_base; } +static unsigned long orion_delay_timer_read(void) +{ + return ~readl(timer_base + TIMER0_VAL_OFF); +} + +static struct delay_timer orion_delay_timer = { + .read_current_timer = orion_delay_timer_read, +}; + void __init orion_time_init(void __iomem *_bridge_base, u32 _bridge_timer1_clr_mask, unsigned int irq, unsigned int tclk) @@ -202,6 +212,9 @@ orion_time_init(void __iomem *_bridge_base, u32 _bridge_timer1_clr_mask, ticks_per_jiffy = (tclk + HZ/2) / HZ; + orion_delay_timer.freq = tclk; + register_current_timer_delay(&orion_delay_timer); + /* * Set scale and timer for sched_clock. */ -- cgit v1.2.3 From d78e13a8a8b795b605eb58a48365591f2e7c690b Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Thu, 7 Jan 2016 16:27:33 +0100 Subject: ARM: 8497/1: initialize cpu_scale to its default Instead of looping through all cpus calling set_capacity_scale, we can initialise cpu_scale per-cpu variables to SCHED_CAPACITY_SCALE with their definition. Acked-by: Vincent Guittot Signed-off-by: Juri Lelli Signed-off-by: Russell King --- arch/arm/kernel/topology.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 08b7847bf912..ec279d161b32 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -40,7 +40,7 @@ * to run the rebalance_domains for all idle cores and the cpu_capacity can be * updated during this sequence. */ -static DEFINE_PER_CPU(unsigned long, cpu_scale); +static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) { @@ -306,8 +306,6 @@ void __init init_cpu_topology(void) cpu_topo->socket_id = -1; cpumask_clear(&cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); - - set_capacity_scale(cpu, SCHED_CAPACITY_SCALE); } smp_wmb(); -- cgit v1.2.3 From 9023cc8268c6ba358417d31112ed96e1feb73e56 Mon Sep 17 00:00:00 2001 From: Andiii Date: Thu, 14 Jan 2016 07:17:00 +0100 Subject: ARM: 8499/1: irq: l2c: do not print error in case of missing l2c from arm: irq: l2c: do not print error in case of missing l2c from dtb In some architectures the L2 cache controller is integrated in the processor's block itself and it doesn't use any external cache controller. This means that an entry in the board's dtb related to the l2c is not necessary. Distinguish between error codes and do not print anything in case l2x0_of_init() doesn't find any L2C DTB entry and returns -ENODEV. This patch mutes the following error message: L2C: failed to init: -19 on boards like odroid-xu4, cortex A7/A15, which don't have external cache controller. Signed-off-by: Andi Shyti Reported-by: Krzysztof Kozlowski Reviewed-by: Krzysztof Kozlowski Tested-by: Krzysztof Kozlowski Signed-off-by: Russell King --- arch/arm/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 1d45320ee125..ece04a457486 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -95,7 +95,7 @@ void __init init_IRQ(void) outer_cache.write_sec = machine_desc->l2c_write_sec; ret = l2x0_of_init(machine_desc->l2c_aux_val, machine_desc->l2c_aux_mask); - if (ret) + if (ret && ret != -ENODEV) pr_err("L2C: failed to init: %d\n", ret); } -- cgit v1.2.3 From 2841029393fad551b49b6de34d44bfa9ef256441 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Jan 2016 17:15:58 +0000 Subject: ARM: make virt_to_idmap() return unsigned long Make virt_to_idmap() return an unsigned long rather than phys_addr_t. Returning phys_addr_t here makes no sense, because the definition of virt_to_idmap() is that it shall return a physical address which maps identically with the virtual address. Since virtual addresses are limited to 32-bit, identity mapped physical addresses are as well. Almost all users already had an implicit narrowing cast to unsigned long so let's make this official and part of this interface. Tested-by: Grygorii Strashko Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 6 +++--- arch/arm/kernel/reboot.c | 2 +- arch/arm/mach-keystone/keystone.c | 2 +- arch/arm/mm/idmap.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index c79b57bf71c4..49bf6b1e2177 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -273,14 +273,14 @@ static inline void *phys_to_virt(phys_addr_t x) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((phys_addr_t)(pfn) << PAGE_SHIFT) -extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x); +extern unsigned long (*arch_virt_to_idmap)(unsigned long x); /* * These are for systems that have a hardware interconnect supported alias of * physical memory for idmap purposes. Most cases should leave these - * untouched. + * untouched. Note: this can only return addresses less than 4GiB. */ -static inline phys_addr_t __virt_to_idmap(unsigned long x) +static inline unsigned long __virt_to_idmap(unsigned long x) { if (IS_ENABLED(CONFIG_MMU) && arch_virt_to_idmap) return arch_virt_to_idmap(x); diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c index 38269358fd25..71a2ff9ec490 100644 --- a/arch/arm/kernel/reboot.c +++ b/arch/arm/kernel/reboot.c @@ -50,7 +50,7 @@ static void __soft_restart(void *addr) flush_cache_all(); /* Switch to the identity mapping. */ - phys_reset = (phys_reset_t)(unsigned long)virt_to_idmap(cpu_reset); + phys_reset = (phys_reset_t)virt_to_idmap(cpu_reset); phys_reset((unsigned long)addr); /* Should never get here. */ diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c index c279293f084c..d80879ce4963 100644 --- a/arch/arm/mach-keystone/keystone.c +++ b/arch/arm/mach-keystone/keystone.c @@ -63,7 +63,7 @@ static void __init keystone_init(void) of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } -static phys_addr_t keystone_virt_to_idmap(unsigned long x) +static unsigned long keystone_virt_to_idmap(unsigned long x) { return (phys_addr_t)(x) - CONFIG_PAGE_OFFSET + KEYSTONE_LOW_PHYS_START; } diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index d65909697165..bd274a05b8ff 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -15,7 +15,7 @@ * page tables. */ pgd_t *idmap_pgd; -phys_addr_t (*arch_virt_to_idmap) (unsigned long x); +unsigned long (*arch_virt_to_idmap)(unsigned long x); #ifdef CONFIG_ARM_LPAE static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, -- cgit v1.2.3 From 4138323eac0b485316e54ad9ce241bac24ddd175 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Jan 2016 17:03:54 +0000 Subject: ARM: use virt_to_idmap() for soft_restart() Code run via soft_restart() is run with the MMU disabled, so we need to pass the identity map physical address rather than the address obtained from virt_to_phys(). Therefore, replace virt_to_phys() with virt_to_idmap() for all callers of soft_restart(). Signed-off-by: Russell King --- arch/arm/kernel/hibernate.c | 4 ++-- arch/arm/kernel/machine_kexec.c | 16 ++++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c index a71501ff6f18..b09561a6d06a 100644 --- a/arch/arm/kernel/hibernate.c +++ b/arch/arm/kernel/hibernate.c @@ -62,7 +62,7 @@ static int notrace arch_save_image(unsigned long unused) ret = swsusp_save(); if (ret == 0) - _soft_restart(virt_to_phys(cpu_resume), false); + _soft_restart(virt_to_idmap(cpu_resume), false); return ret; } @@ -87,7 +87,7 @@ static void notrace arch_restore_image(void *unused) for (pbe = restore_pblist; pbe; pbe = pbe->next) copy_page(pbe->orig_address, pbe->address); - _soft_restart(virt_to_phys(cpu_resume), false); + _soft_restart(virt_to_idmap(cpu_resume), false); } static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata; diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 8bf3b7c09888..59fd0e24c56b 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -143,10 +143,8 @@ void (*kexec_reinit)(void); void machine_kexec(struct kimage *image) { - unsigned long page_list; - unsigned long reboot_code_buffer_phys; - unsigned long reboot_entry = (unsigned long)relocate_new_kernel; - unsigned long reboot_entry_phys; + unsigned long page_list, reboot_entry_phys; + void (*reboot_entry)(void); void *reboot_code_buffer; /* @@ -159,9 +157,6 @@ void machine_kexec(struct kimage *image) page_list = image->head & PAGE_MASK; - /* we need both effective and real address here */ - reboot_code_buffer_phys = - page_to_pfn(image->control_code_page) << PAGE_SHIFT; reboot_code_buffer = page_address(image->control_code_page); /* Prepare parameters for reboot_code_buffer*/ @@ -174,10 +169,11 @@ void machine_kexec(struct kimage *image) /* copy our kernel relocation code to the control code page */ reboot_entry = fncpy(reboot_code_buffer, - reboot_entry, + &relocate_new_kernel, relocate_new_kernel_size); - reboot_entry_phys = (unsigned long)reboot_entry + - (reboot_code_buffer_phys - (unsigned long)reboot_code_buffer); + + /* get the identity mapping physical address for the reboot code */ + reboot_entry_phys = virt_to_idmap(reboot_entry); pr_info("Bye!\n"); -- cgit v1.2.3 From 25362dc496edaf17f714c0fecd8b3eb79670207b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 26 Jan 2016 01:19:36 +0100 Subject: ARM: 8501/1: mm: flip priority of CONFIG_DEBUG_RODATA The use of CONFIG_DEBUG_RODATA is generally seen as an essential part of kernel self-protection: http://www.openwall.com/lists/kernel-hardening/2015/11/30/13 Additionally, its name has grown to mean things beyond just rodata. To get ARM closer to this, we ought to rearrange the names of the configs that control how the kernel protects its memory. What was called CONFIG_ARM_KERNMEM_PERMS is realy doing the work that other architectures call CONFIG_DEBUG_RODATA. This redefines CONFIG_DEBUG_RODATA to actually do the bulk of the ROing (and NXing). In the place of the old CONFIG_DEBUG_RODATA, use CONFIG_DEBUG_ALIGN_RODATA, since that's what the option does: adds section alignment for making rodata explicitly NX, as arm does not split the page tables like arm64 does without _ALIGN_RODATA. Also adds human readable names to the sections so I could more easily debug my typos, and makes CONFIG_DEBUG_RODATA default "y" for CPU_V7. Results in /sys/kernel/debug/kernel_page_tables for each config state: # CONFIG_DEBUG_RODATA is not set # CONFIG_DEBUG_ALIGN_RODATA is not set ---[ Kernel Mapping ]--- 0x80000000-0x80900000 9M RW x SHD 0x80900000-0xa0000000 503M RW NX SHD CONFIG_DEBUG_RODATA=y CONFIG_DEBUG_ALIGN_RODATA=y ---[ Kernel Mapping ]--- 0x80000000-0x80100000 1M RW NX SHD 0x80100000-0x80700000 6M ro x SHD 0x80700000-0x80a00000 3M ro NX SHD 0x80a00000-0xa0000000 502M RW NX SHD CONFIG_DEBUG_RODATA=y # CONFIG_DEBUG_ALIGN_RODATA is not set ---[ Kernel Mapping ]--- 0x80000000-0x80100000 1M RW NX SHD 0x80100000-0x80a00000 9M ro x SHD 0x80a00000-0xa0000000 502M RW NX SHD Signed-off-by: Kees Cook Reviewed-by: Laura Abbott Signed-off-by: Russell King --- arch/arm/kernel/vmlinux.lds.S | 10 +++++----- arch/arm/mm/Kconfig | 34 ++++++++++++++++++---------------- arch/arm/mm/init.c | 19 ++++++++++--------- 3 files changed, 33 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 8b60fde5ce48..a6e395c53a48 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -8,7 +8,7 @@ #include #include #include -#ifdef CONFIG_ARM_KERNMEM_PERMS +#ifdef CONFIG_DEBUG_RODATA #include #endif @@ -94,7 +94,7 @@ SECTIONS HEAD_TEXT } -#ifdef CONFIG_ARM_KERNMEM_PERMS +#ifdef CONFIG_DEBUG_RODATA . = ALIGN(1<active_mm); } -#endif /* CONFIG_DEBUG_RODATA */ #else static inline void fix_kernmem_perms(void) { } -#endif /* CONFIG_ARM_KERNMEM_PERMS */ +#endif /* CONFIG_DEBUG_RODATA */ void free_tcmmem(void) { -- cgit v1.2.3 From 73e592f3bc2cdc68df9dbc92e681b61f9bc6c2bf Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 27 Jan 2016 20:14:00 +0100 Subject: ARM: 8504/1: __arch_xprod_64(): small optimization The tmp variable is used twice: first to pose as a register containing a value of zero, and then to provide a temporary register that initially is zero and get added some value. But somehow gcc decides to split those two usages in different registers. Example code: u64 div64const1000(u64 x) { u32 y = 1000; do_div(x, y); return x; } Result: div64const1000: push {r4, r5, r6, r7, lr} mov lr, #0 mov r6, r0 mov r7, r1 adr r5, .L8 ldrd r4, [r5] mov r1, lr umull r2, r3, r4, r6 cmn r2, r4 adcs r3, r3, r5 adc r2, lr, #0 umlal r3, r2, r5, r6 umlal r3, r1, r4, r7 mov r3, #0 adds r2, r1, r2 adc r3, r3, #0 umlal r2, r3, r5, r7 lsr r0, r2, #9 lsr r1, r3, #9 orr r0, r0, r3, lsl #23 pop {r4, r5, r6, r7, pc} .align 3 .L8: .word -1924145349 .word -2095944041 Full kernel build size: text data bss dec hex filename 13663814 1553940 351368 15569122 ed90e2 vmlinux Here the two instances of 'tmp' are assigned to r1 and lr. To avoid that, let's mark the first 'tmp' usage in __arch_xprod_64() with a "+r" constraint even if the register is not written to, so to create a dependency for the second usage with the effect of enforcing a single temporary register throughout. Result: div64const1000: push {r4, r5, r6, r7} movs r3, #0 adr r5, .L8 ldrd r4, [r5] umull r6, r7, r4, r0 cmn r6, r4 adcs r7, r7, r5 adc r6, r3, #0 umlal r7, r6, r5, r0 umlal r7, r3, r4, r1 mov r7, #0 adds r6, r3, r6 adc r7, r7, #0 umlal r6, r7, r5, r1 lsr r0, r6, #9 lsr r1, r7, #9 orr r0, r0, r7, lsl #23 pop {r4, r5, r6, r7} bx lr .align 3 .L8: .word -1924145349 .word -2095944041 text data bss dec hex filename 13663438 1553940 351368 15568746 ed8f6a vmlinux This time 'tmp' is assigned to r3 and used throughout. However, by being assigned to r3, that blocks usage of the r2-r3 double register slot for 64-bit values, forcing more registers to be spilled on the stack. Let's try to help it by forcing 'tmp' to the caller-saved ip register. Result: div64const1000: stmfd sp!, {r4, r5} mov ip, #0 adr r5, .L8 ldrd r4, [r5] umull r2, r3, r4, r0 cmn r2, r4 adcs r3, r3, r5 adc r2, ip, #0 umlal r3, r2, r5, r0 umlal r3, ip, r4, r1 mov r3, #0 adds r2, ip, r2 adc r3, r3, #0 umlal r2, r3, r5, r1 mov r0, r2, lsr #9 mov r1, r3, lsr #9 orr r0, r0, r3, asl #23 ldmfd sp!, {r4, r5} bx lr .align 3 .L8: .word -1924145349 .word -2095944041 text data bss dec hex filename 13662838 1553940 351368 15568146 ed8d12 vmlinux We could make the code marginally smaller yet by forcing 'tmp' to lr instead, but that would have a negative inpact on branch prediction for which "bx lr" is optimal. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/include/asm/div64.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index e1f07764b0d6..7d919a9b32e5 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -74,7 +74,7 @@ static inline uint32_t __div64_32(uint64_t *n, uint32_t base) static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias) { unsigned long long res; - unsigned int tmp = 0; + register unsigned int tmp asm("ip") = 0; if (!bias) { asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" @@ -90,12 +90,12 @@ static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias) : "r" (m), "r" (n) : "cc"); } else { - asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" - "cmn %Q0, %Q1\n\t" - "adcs %R0, %R0, %R1\n\t" - "adc %Q0, %3, #0" - : "=&r" (res) - : "r" (m), "r" (n), "r" (tmp) + asm ( "umull %Q0, %R0, %Q2, %Q3\n\t" + "cmn %Q0, %Q2\n\t" + "adcs %R0, %R0, %R2\n\t" + "adc %Q0, %1, #0" + : "=&r" (res), "+&r" (tmp) + : "r" (m), "r" (n) : "cc"); } -- cgit v1.2.3 From 33298ef6d8ddef57aaa1d11ed53fc08bef2f95aa Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Fri, 29 Jan 2016 23:06:08 +0100 Subject: ARM: 8505/1: dma-mapping: Optimize allocation The __iommu_alloc_buffer() is expected to be called to allocate pretty sizeable buffers. Upon simple tests of video I saw it trying to allocate 4,194,304 bytes. The function tries to allocate large chunks in order to optimize IOMMU TLB usage. The current function is very, very slow. One problem is the way it keeps trying and trying to allocate big chunks. Imagine a very fragmented memory that has 4M free but no contiguous pages at all. Further imagine allocating 4M (1024 pages). We'll do the following memory allocations: - For page 1: - Try to allocate order 10 (no retry) - Try to allocate order 9 (no retry) - ... - Try to allocate order 0 (with retry, but not needed) - For page 2: - Try to allocate order 9 (no retry) - Try to allocate order 8 (no retry) - ... - Try to allocate order 0 (with retry, but not needed) - ... - ... Total number of calls to alloc() calls for this case is: sum(int(math.log(i, 2)) + 1 for i in range(1, 1025)) => 9228 The above is obviously worse case, but given how slow alloc can be we really want to try to avoid even somewhat bad cases. I timed the old code with a device under memory pressure and it wasn't hard to see it take more than 120 seconds to allocate 4 megs of memory! (NOTE: testing was done on kernel 3.14, so possibly mainline would behave differently). A second problem is that allocating big chunks under memory pressure when we don't need them is just not a great idea anyway unless we really need them. We can make due pretty well with smaller chunks so it's probably wise to leave bigger chunks for other users once memory pressure is on. Let's adjust the allocation like this: 1. If a big chunk fails, stop trying to hard and bump down to lower order allocations. 2. Don't try useless orders. The whole point of big chunks is to optimize the TLB and it can really only make use of 2M, 1M, 64K and 4K sizes. We'll still tend to eat up a bunch of big chunks, but that might be the right answer for some users. A future patch could possibly add a new DMA_ATTR that would let the caller decide that TLB optimization isn't important and that we should use smaller chunks. Presumably this would be a sane strategy for some callers. Signed-off-by: Douglas Anderson Acked-by: Marek Szyprowski Reviewed-by: Robin Murphy Reviewed-by: Tomasz Figa Tested-by: Javier Martinez Canillas Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 0eca3812527e..bc9cebfa0891 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1122,6 +1122,9 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping, spin_unlock_irqrestore(&mapping->lock, flags); } +/* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */ +static const int iommu_order_array[] = { 9, 8, 4, 0 }; + static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp, struct dma_attrs *attrs) { @@ -1129,6 +1132,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, int count = size >> PAGE_SHIFT; int array_size = count * sizeof(struct page *); int i = 0; + int order_idx = 0; if (array_size <= PAGE_SIZE) pages = kzalloc(array_size, GFP_KERNEL); @@ -1162,22 +1166,24 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, while (count) { int j, order; - for (order = __fls(count); order > 0; --order) { - /* - * We do not want OOM killer to be invoked as long - * as we can fall back to single pages, so we force - * __GFP_NORETRY for orders higher than zero. - */ - pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); - if (pages[i]) - break; + order = iommu_order_array[order_idx]; + + /* Drop down when we get small */ + if (__fls(count) < order) { + order_idx++; + continue; } - if (!pages[i]) { - /* - * Fall back to single page allocation. - * Might invoke OOM killer as last resort. - */ + if (order) { + /* See if it's easy to allocate a high-order chunk */ + pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); + + /* Go down a notch at first sign of pressure */ + if (!pages[i]) { + order_idx++; + continue; + } + } else { pages[i] = alloc_pages(gfp, 0); if (!pages[i]) goto error; -- cgit v1.2.3 From 14d3ae2efeed4ebcc6313fad61470803eb904126 Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Fri, 29 Jan 2016 23:08:46 +0100 Subject: ARM: 8507/1: dma-mapping: Use DMA_ATTR_ALLOC_SINGLE_PAGES hint to optimize alloc If we know that TLB efficiency will not be an issue when memory is accessed then it's not terribly important to allocate big chunks of memory. The whole point of allocating the big chunks was that it would make TLB usage efficient. As Marek Szyprowski indicated: Please note that mapping memory with larger pages significantly improves performance, especially when IOMMU has a little TLB cache. This can be easily observed when multimedia devices do processing of RGB data with 90/270 degree rotation Image rotation is distinctly an operation that needs to bounce around through memory, so it makes sense that TLB efficiency is important there. Video decoding, on the other hand, is a fairly sequential operation. During video decoding it's not expected that we'll be jumping all over memory. Decoding video is also pretty heavy and the TLB misses aren't a huge deal. Presumably most HW video acceleration users of dma-mapping will not care about huge pages and will set DMA_ATTR_ALLOC_SINGLE_PAGES. Allocating big chunks of memory is quite expensive, especially if we're doing it repeadly and memory is full. In one (out of tree) usage model it is common that arm_iommu_alloc_attrs() is called 16 times in a row, each one trying to allocate 4 MB of memory. This is called whenever the system encounters a new video, which could easily happen while the memory system is stressed out. In fact, on certain social media websites that auto-play video and have infinite scrolling, it's quite common to see not just one of these 16x4MB allocations but 2 or 3 right after another. Asking the system even to do a small amount of extra work to give us big chunks in this case is just not a good use of time. Allocating big chunks of memory is also expensive indirectly. Even if we ask the system not to do ANY extra work to allocate _our_ memory, we're still potentially eating up all big chunks in the system. Presumably there are other users in the system that aren't quite as flexible and that actually need these big chunks. By eating all the big chunks we're causing extra work for the rest of the system. We also may start making other memory allocations fail. While the system may be robust to such failures (as is the case with dwc2 USB trying to allocate buffers for Ethernet data and with WiFi trying to allocate buffers for WiFi data), it is yet another big performance hit. Signed-off-by: Douglas Anderson Acked-by: Marek Szyprowski Tested-by: Javier Martinez Canillas Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index bc9cebfa0891..9f996a3d79f7 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1158,6 +1158,10 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, return pages; } + /* Go straight to 4K chunks if caller says it's OK. */ + if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs)) + order_idx = ARRAY_SIZE(iommu_order_array) - 1; + /* * IOMMU can map any pages, so himem can also be used here */ -- cgit v1.2.3 From 1b9bdf5c1661873a10e193b8cbb803a87fe5c4a1 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 1 Feb 2016 18:01:29 +0100 Subject: ARM: 8510/1: rework ARM_CPU_SUSPEND dependencies The code enabled by the ARM_CPU_SUSPEND config option is used by kernel subsystems for purposes that go beyond system suspend so its config entry should be augmented to take more default options into account and avoid forcing its selection to prevent dependencies override. To achieve this goal, this patch reworks the ARM_CPU_SUSPEND config entry and updates its default config value (by adding the BL_SWITCHER option to it) and its dependencies (ARCH_SUSPEND_POSSIBLE), so that the symbol is still selected by default by the subsystems requiring it and at the same time enforcing the dependencies correctly. Signed-off-by: Lorenzo Pieralisi Cc: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4f799e567fc8..8456f69bc77d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1337,7 +1337,6 @@ config BIG_LITTLE config BL_SWITCHER bool "big.LITTLE switcher support" depends on BIG_LITTLE && MCPM && HOTPLUG_CPU && ARM_GIC - select ARM_CPU_SUSPEND select CPU_PM help The big.LITTLE "switcher" provides the core functionality to @@ -2111,7 +2110,8 @@ config ARCH_SUSPEND_POSSIBLE def_bool y config ARM_CPU_SUSPEND - def_bool PM_SLEEP + def_bool PM_SLEEP || BL_SWITCHER + depends on ARCH_SUSPEND_POSSIBLE config ARCH_HIBERNATION_POSSIBLE bool -- cgit v1.2.3 From 8b6f2499ac45d5a0ab2e4b6f9613ab3f60416be1 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 1 Feb 2016 18:01:30 +0100 Subject: ARM: 8511/1: ARM64: kernel: PSCI: move PSCI idle management code to drivers/firmware ARM64 PSCI kernel interfaces that initialize idle states and implement the suspend API to enter them are generic and can be shared with the ARM architecture. To achieve that goal, this patch moves ARM64 PSCI idle management code to drivers/firmware, so that the interface to initialize and enter idle states can actually be shared by ARM and ARM64 arches back-ends. The ARM generic CPUidle implementation also requires the definition of a cpuidle_ops section entry for the kernel to initialize the CPUidle operations at boot based on the enable-method (ie ARM64 has the statically initialized cpu_ops counterparts for that purpose); therefore this patch also adds the required section entry on CONFIG_ARM for PSCI so that the kernel can initialize the PSCI CPUidle back-end when PSCI is the probed enable-method. On ARM64 this patch provides no functional change. Signed-off-by: Lorenzo Pieralisi Acked-by: Daniel Lezcano Acked-by: Catalin Marinas [arch/arm64] Acked-by: Mark Rutland Tested-by: Jisheng Zhang Cc: Will Deacon Cc: Sudeep Holla Cc: Daniel Lezcano Cc: Catalin Marinas Cc: Mark Rutland Cc: Jisheng Zhang Signed-off-by: Russell King --- arch/arm/Kconfig | 2 +- arch/arm64/kernel/psci.c | 99 +------------------------------------- drivers/firmware/psci.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/psci.h | 3 ++ 4 files changed, 126 insertions(+), 98 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8456f69bc77d..cc95ff8f07cb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2110,7 +2110,7 @@ config ARCH_SUSPEND_POSSIBLE def_bool y config ARM_CPU_SUSPEND - def_bool PM_SLEEP || BL_SWITCHER + def_bool PM_SLEEP || BL_SWITCHER || ARM_PSCI_FW depends on ARCH_SUSPEND_POSSIBLE config ARCH_HIBERNATION_POSSIBLE diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index f67f35b6edb1..42816bebb1e0 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -20,7 +20,6 @@ #include #include #include -#include #include @@ -28,73 +27,6 @@ #include #include #include -#include - -static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); - -static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu) -{ - int i, ret, count = 0; - u32 *psci_states; - struct device_node *state_node, *cpu_node; - - cpu_node = of_get_cpu_node(cpu, NULL); - if (!cpu_node) - return -ENODEV; - - /* - * If the PSCI cpu_suspend function hook has not been initialized - * idle states must not be enabled, so bail out - */ - if (!psci_ops.cpu_suspend) - return -EOPNOTSUPP; - - /* Count idle states */ - while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states", - count))) { - count++; - of_node_put(state_node); - } - - if (!count) - return -ENODEV; - - psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); - if (!psci_states) - return -ENOMEM; - - for (i = 0; i < count; i++) { - u32 state; - - state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); - - ret = of_property_read_u32(state_node, - "arm,psci-suspend-param", - &state); - if (ret) { - pr_warn(" * %s missing arm,psci-suspend-param property\n", - state_node->full_name); - of_node_put(state_node); - goto free_mem; - } - - of_node_put(state_node); - pr_debug("psci-power-state %#x index %d\n", state, i); - if (!psci_power_state_is_valid(state)) { - pr_warn("Invalid PSCI power state %#x\n", state); - ret = -EINVAL; - goto free_mem; - } - psci_states[i] = state; - } - /* Idle states parsed correctly, initialize per-cpu pointer */ - per_cpu(psci_power_state, cpu) = psci_states; - return 0; - -free_mem: - kfree(psci_states); - return ret; -} static int __init cpu_psci_cpu_init(unsigned int cpu) { @@ -178,38 +110,11 @@ static int cpu_psci_cpu_kill(unsigned int cpu) } #endif -static int psci_suspend_finisher(unsigned long index) -{ - u32 *state = __this_cpu_read(psci_power_state); - - return psci_ops.cpu_suspend(state[index - 1], - virt_to_phys(cpu_resume)); -} - -static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index) -{ - int ret; - u32 *state = __this_cpu_read(psci_power_state); - /* - * idle state index 0 corresponds to wfi, should never be called - * from the cpu_suspend operations - */ - if (WARN_ON_ONCE(!index)) - return -EINVAL; - - if (!psci_power_state_loses_context(state[index - 1])) - ret = psci_ops.cpu_suspend(state[index - 1], 0); - else - ret = cpu_suspend(index, psci_suspend_finisher); - - return ret; -} - const struct cpu_operations cpu_psci_ops = { .name = "psci", #ifdef CONFIG_CPU_IDLE - .cpu_init_idle = cpu_psci_cpu_init_idle, - .cpu_suspend = cpu_psci_cpu_suspend, + .cpu_init_idle = psci_cpu_init_idle, + .cpu_suspend = psci_cpu_suspend_enter, #endif .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c index f25cd79c8a79..11bfee8b79a9 100644 --- a/drivers/firmware/psci.c +++ b/drivers/firmware/psci.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "psci: " fmt #include +#include #include #include #include @@ -21,10 +22,12 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -244,6 +247,123 @@ static int __init psci_features(u32 psci_func_id) psci_func_id, 0, 0); } +#ifdef CONFIG_CPU_IDLE +static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); + +static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu) +{ + int i, ret, count = 0; + u32 *psci_states; + struct device_node *state_node; + + /* + * If the PSCI cpu_suspend function hook has not been initialized + * idle states must not be enabled, so bail out + */ + if (!psci_ops.cpu_suspend) + return -EOPNOTSUPP; + + /* Count idle states */ + while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states", + count))) { + count++; + of_node_put(state_node); + } + + if (!count) + return -ENODEV; + + psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); + if (!psci_states) + return -ENOMEM; + + for (i = 0; i < count; i++) { + u32 state; + + state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); + + ret = of_property_read_u32(state_node, + "arm,psci-suspend-param", + &state); + if (ret) { + pr_warn(" * %s missing arm,psci-suspend-param property\n", + state_node->full_name); + of_node_put(state_node); + goto free_mem; + } + + of_node_put(state_node); + pr_debug("psci-power-state %#x index %d\n", state, i); + if (!psci_power_state_is_valid(state)) { + pr_warn("Invalid PSCI power state %#x\n", state); + ret = -EINVAL; + goto free_mem; + } + psci_states[i] = state; + } + /* Idle states parsed correctly, initialize per-cpu pointer */ + per_cpu(psci_power_state, cpu) = psci_states; + return 0; + +free_mem: + kfree(psci_states); + return ret; +} + +int psci_cpu_init_idle(unsigned int cpu) +{ + struct device_node *cpu_node; + int ret; + + cpu_node = of_get_cpu_node(cpu, NULL); + if (!cpu_node) + return -ENODEV; + + ret = psci_dt_cpu_init_idle(cpu_node, cpu); + + of_node_put(cpu_node); + + return ret; +} + +static int psci_suspend_finisher(unsigned long index) +{ + u32 *state = __this_cpu_read(psci_power_state); + + return psci_ops.cpu_suspend(state[index - 1], + virt_to_phys(cpu_resume)); +} + +int psci_cpu_suspend_enter(unsigned long index) +{ + int ret; + u32 *state = __this_cpu_read(psci_power_state); + /* + * idle state index 0 corresponds to wfi, should never be called + * from the cpu_suspend operations + */ + if (WARN_ON_ONCE(!index)) + return -EINVAL; + + if (!psci_power_state_loses_context(state[index - 1])) + ret = psci_ops.cpu_suspend(state[index - 1], 0); + else + ret = cpu_suspend(index, psci_suspend_finisher); + + return ret; +} + +/* ARM specific CPU idle operations */ +#ifdef CONFIG_ARM +static struct cpuidle_ops psci_cpuidle_ops __initdata = { + .suspend = psci_cpu_suspend_enter, + .init = psci_dt_cpu_init_idle, +}; + +CPUIDLE_METHOD_OF_DECLARE(psci, "arm,psci", &psci_cpuidle_ops); +#endif +#endif + static int psci_system_suspend(unsigned long unused) { return invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND), diff --git a/include/linux/psci.h b/include/linux/psci.h index 12c4865457ad..393efe2edf9a 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -24,6 +24,9 @@ bool psci_tos_resident_on(int cpu); bool psci_power_state_loses_context(u32 state); bool psci_power_state_is_valid(u32 state); +int psci_cpu_init_idle(unsigned int cpu); +int psci_cpu_suspend_enter(unsigned long index); + struct psci_operations { int (*cpu_suspend)(u32 state, unsigned long entry_point); int (*cpu_off)(u32 state); -- cgit v1.2.3 From 538bf4694898b19e76f32d554cc16135cf37b51c Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Wed, 3 Feb 2016 15:58:10 +0100 Subject: ARM: 8513/1: xip: Move XIP linking to a separate file When building an XIP kernel, the linker script needs to be much different than a conventional kernel's script. Over time, it's been difficult to maintain both XIP and non-XIP layouts in one linker script. Therefore, this patch separates the two procedures into two completely different files. The new linker script is essentially a straight copy of the current script with all the non-CONFIG_XIP_KERNEL portions removed. Additionally, all CONFIG_XIP_KERNEL portions have been removed from the existing linker script...never to return again. It should be noted that this does not fix any current XIP issues, but rather is the first move in fixing them properly with subsequent patches. Signed-off-by: Chris Brandt Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/vmlinux-xip.lds.S | 322 ++++++++++++++++++++++++++++++++++++++ arch/arm/kernel/vmlinux.lds.S | 37 ++--- 2 files changed, 332 insertions(+), 27 deletions(-) create mode 100644 arch/arm/kernel/vmlinux-xip.lds.S (limited to 'arch') diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S new file mode 100644 index 000000000000..6f59ef290289 --- /dev/null +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -0,0 +1,322 @@ +/* ld script to make ARM Linux kernel + * taken from the i386 version by Russell King + * Written by Martin Mares + */ + +#include +#include +#include +#include +#include +#ifdef CONFIG_DEBUG_RODATA +#include +#endif + +#define PROC_INFO \ + . = ALIGN(4); \ + VMLINUX_SYMBOL(__proc_info_begin) = .; \ + *(.proc.info.init) \ + VMLINUX_SYMBOL(__proc_info_end) = .; + +#define IDMAP_TEXT \ + ALIGN_FUNCTION(); \ + VMLINUX_SYMBOL(__idmap_text_start) = .; \ + *(.idmap.text) \ + VMLINUX_SYMBOL(__idmap_text_end) = .; \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + *(.hyp.idmap.text) \ + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; + +#ifdef CONFIG_HOTPLUG_CPU +#define ARM_CPU_DISCARD(x) +#define ARM_CPU_KEEP(x) x +#else +#define ARM_CPU_DISCARD(x) x +#define ARM_CPU_KEEP(x) +#endif + +#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \ + defined(CONFIG_GENERIC_BUG) +#define ARM_EXIT_KEEP(x) x +#define ARM_EXIT_DISCARD(x) +#else +#define ARM_EXIT_KEEP(x) +#define ARM_EXIT_DISCARD(x) x +#endif + +OUTPUT_ARCH(arm) +ENTRY(stext) + +#ifndef __ARMEB__ +jiffies = jiffies_64; +#else +jiffies = jiffies_64 + 4; +#endif + +SECTIONS +{ + /* + * XXX: The linker does not define how output sections are + * assigned to input sections when there are multiple statements + * matching the same input section name. There is no documented + * order of matching. + * + * unwind exit sections must be discarded before the rest of the + * unwind sections get included. + */ + /DISCARD/ : { + *(.ARM.exidx.exit.text) + *(.ARM.extab.exit.text) + ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) + ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) + ARM_EXIT_DISCARD(EXIT_TEXT) + ARM_EXIT_DISCARD(EXIT_DATA) + EXIT_CALL +#ifndef CONFIG_MMU + *(.text.fixup) + *(__ex_table) +#endif +#ifndef CONFIG_SMP_ON_UP + *(.alt.smp.init) +#endif + *(.discard) + *(.discard.*) + } + + . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); + + .head.text : { + _text = .; + HEAD_TEXT + } + +#ifdef CONFIG_DEBUG_RODATA + . = ALIGN(1< */ +#ifdef CONFIG_XIP_KERNEL +#include "vmlinux-xip.lds.S" +#else + #include #include #include @@ -84,11 +88,7 @@ SECTIONS *(.discard.*) } -#ifdef CONFIG_XIP_KERNEL - . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); -#else . = PAGE_OFFSET + TEXT_OFFSET; -#endif .head.text : { _text = .; HEAD_TEXT @@ -152,14 +152,13 @@ SECTIONS _etext = .; /* End of text and rodata section */ -#ifndef CONFIG_XIP_KERNEL -# ifdef CONFIG_DEBUG_RODATA +#ifdef CONFIG_DEBUG_RODATA . = ALIGN(1< Date: Fri, 5 Feb 2016 10:04:47 +0100 Subject: ARM: 8514/1: remove duplicate definitions of __vectors_start and __stubs_start Commit b9b32bf70f2f ("ARM: use linker magic for vectors and vector stubs") introduced new global definitions of __vectors_start and __stubs_start, and changed the existing ones to have internal linkage only. However, these symbols are still visible to kallsyms, and due to the way the .vectors and .stubs sections are emitted at the base of the VMA space, these duplicate definitions have conflicting values. $ nm -n vmlinux |grep -E __vectors|__stubs 00000000 t __vectors_start 00001000 t __stubs_start c0e77000 T __vectors_start c0e77020 T __stubs_start This is completely harmless by itself, since the wrong values are local symbols that cannot be referenced by other object files directly. However, since these symbols are also listed in the kallsyms symbol table in some cases (i.e., CONFIG_KALLSYMS_ALL=y and CONFIG_XIP_KERNEL=y), having these conflicting values can be confusing. So either remove them, or make them strictly local. Acked-by: Chris Brandt Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 3ce377f7251f..788e40c1254f 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1064,7 +1064,6 @@ ENDPROC(vector_\name) .endm .section .stubs, "ax", %progbits -__stubs_start: @ This must be the first word .word vector_swi @@ -1206,10 +1205,10 @@ vector_addrexcptn: .equ vector_fiq_offset, vector_fiq .section .vectors, "ax", %progbits -__vectors_start: +.L__vectors_start: W(b) vector_rst W(b) vector_und - W(ldr) pc, __vectors_start + 0x1000 + W(ldr) pc, .L__vectors_start + 0x1000 W(b) vector_pabt W(b) vector_dabt W(b) vector_addrexcptn -- cgit v1.2.3 From 31b96cae5ca2e751ce3dcf4839c2398a1e8b8fc3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Feb 2016 11:41:08 +0100 Subject: ARM: 8515/2: move .vectors and .stubs sections back into the kernel VMA Commit b9b32bf70f2f ("ARM: use linker magic for vectors and vector stubs") updated the linker script to emit the .vectors and .stubs sections into a VMA range that is zero based and disjoint from the normal static kernel region. The reason for that was that this way, the sections can be placed exactly 4 KB apart, while the payload of the .vectors section is only 32 bytes. Since the symbols that are part of the .stubs section are emitted into the kallsyms table, they appear with zero based addresses as well, e.g., 00001004 t vector_rst 00001020 t vector_irq 000010a0 t vector_dabt 00001120 t vector_pabt 000011a0 t vector_und 00001220 t vector_addrexcptn 00001240 t vector_fiq 00001240 T vector_fiq_offset As this confuses perf when it accesses the kallsyms tables, commit 7122c3e9154b ("scripts/link-vmlinux.sh: only filter kernel symbols for arm") implemented a somewhat ugly special case for ARM, where the value of CONFIG_PAGE_OFFSET is passed to scripts/kallsyms, and symbols whose addresses are below it are filtered out. Note that this special case only applies to CONFIG_XIP_KERNEL=n, not because the issue the patch addresses exists only in that case, but because finding a limit below which to apply the filtering is not entirely straightforward. Since the .vectors and .stubs sections contain position independent code that is never executed in place, we can emit it at its most likely runtime VMA (for more recent CPUs), which is 0xffff0000 for the vector table and 0xffff1000 for the stubs. Not only does this fix the perf issue with kallsyms, allowing us to drop the special case in scripts/kallsyms entirely, it also gives debuggers a more realistic view of the address space, and setting breakpoints or single stepping through code in the vector table or the stubs is more likely to work as expected on CPUs that use a high vector address. E.g., 00001240 A vector_fiq_offset ... c0c35000 T __init_begin c0c35000 T __vectors_start c0c35020 T __stubs_start c0c35020 T __vectors_end c0c352e0 T _sinittext c0c352e0 T __stubs_end ... ffff1004 t vector_rst ffff1020 t vector_irq ffff10a0 t vector_dabt ffff1120 t vector_pabt ffff11a0 t vector_und ffff1220 t vector_addrexcptn ffff1240 T vector_fiq (Note that vector_fiq_offset is now an absolute symbol, which kallsyms already ignores by default) The LMA footprint is identical with or without this change, only the VMAs are different: Before: Idx Name Size VMA LMA File off Algn ... 14 .notes 00000024 c0c34020 c0c34020 00a34020 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 15 .vectors 00000020 00000000 c0c35000 00a40000 2**1 CONTENTS, ALLOC, LOAD, READONLY, CODE 16 .stubs 000002c0 00001000 c0c35020 00a41000 2**5 CONTENTS, ALLOC, LOAD, READONLY, CODE 17 .init.text 0006b1b8 c0c352e0 c0c352e0 00a452e0 2**5 CONTENTS, ALLOC, LOAD, READONLY, CODE ... After: Idx Name Size VMA LMA File off Algn ... 14 .notes 00000024 c0c34020 c0c34020 00a34020 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 15 .vectors 00000020 ffff0000 c0c35000 00a40000 2**1 CONTENTS, ALLOC, LOAD, READONLY, CODE 16 .stubs 000002c0 ffff1000 c0c35020 00a41000 2**5 CONTENTS, ALLOC, LOAD, READONLY, CODE 17 .init.text 0006b1b8 c0c352e0 c0c352e0 00a452e0 2**5 CONTENTS, ALLOC, LOAD, READONLY, CODE ... Acked-by: Nicolas Pitre Acked-by: Chris Brandt Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 3 +-- arch/arm/kernel/vmlinux-xip.lds.S | 6 ++++-- arch/arm/kernel/vmlinux.lds.S | 6 ++++-- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 788e40c1254f..e2550500486d 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1201,8 +1201,7 @@ vector_addrexcptn: .long __fiq_svc @ e .long __fiq_svc @ f - .globl vector_fiq_offset - .equ vector_fiq_offset, vector_fiq + .globl vector_fiq .section .vectors, "ax", %progbits .L__vectors_start: diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 6f59ef290289..7b1ded6fb628 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -154,19 +154,21 @@ SECTIONS * only thing that matters is their relative offsets */ __vectors_start = .; - .vectors 0 : AT(__vectors_start) { + .vectors 0xffff0000 : AT(__vectors_start) { *(.vectors) } . = __vectors_start + SIZEOF(.vectors); __vectors_end = .; __stubs_start = .; - .stubs 0x1000 : AT(__stubs_start) { + .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { *(.stubs) } . = __stubs_start + SIZEOF(.stubs); __stubs_end = .; + PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors)); + INIT_TEXT_SECTION(8) .exit.text : { ARM_EXIT_KEEP(EXIT_TEXT) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index cdc84693091b..e087a2ed112b 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -164,19 +164,21 @@ SECTIONS * only thing that matters is their relative offsets */ __vectors_start = .; - .vectors 0 : AT(__vectors_start) { + .vectors 0xffff0000 : AT(__vectors_start) { *(.vectors) } . = __vectors_start + SIZEOF(.vectors); __vectors_end = .; __stubs_start = .; - .stubs 0x1000 : AT(__stubs_start) { + .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { *(.stubs) } . = __stubs_start + SIZEOF(.stubs); __stubs_end = .; + PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors)); + INIT_TEXT_SECTION(8) .exit.text : { ARM_EXIT_KEEP(EXIT_TEXT) -- cgit v1.2.3 From 02afa9a87b232bca15bc30808b9310c6388ca1a8 Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Tue, 9 Feb 2016 19:34:43 +0100 Subject: ARM: 8518/1: Use correct symbols for XIP_KERNEL For an XIP build, _etext does not represent the end of the binary image that needs to stay mapped into the MODULES_VADDR area. Years ago, data came before text in the memory map. However, now that the order is text/init/data, an XIP_KERNEL needs to map up to the data location in order to keep from cutting off parts of the kernel that are needed. We only map up to the beginning of data because data has already been copied, so there's no reason to keep it around anymore. A new symbol is created to make it clear what it is we are referring to. This fixes the bug where you might lose the end of your kernel area after page table setup is complete. Signed-off-by: Chris Brandt Signed-off-by: Russell King --- arch/arm/include/asm/Kbuild | 1 - arch/arm/include/asm/sections.h | 8 ++++++++ arch/arm/kernel/module.c | 2 +- arch/arm/kernel/vmlinux-xip.lds.S | 2 ++ arch/arm/mm/mmu.c | 8 ++++++-- 5 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/sections.h (limited to 'arch') diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 16da6380eb85..3f6616b472af 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -23,7 +23,6 @@ generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h generic-y += seccomp.h -generic-y += sections.h generic-y += segment.h generic-y += sembuf.h generic-y += serial.h diff --git a/arch/arm/include/asm/sections.h b/arch/arm/include/asm/sections.h new file mode 100644 index 000000000000..803bbf2b20b8 --- /dev/null +++ b/arch/arm/include/asm/sections.h @@ -0,0 +1,8 @@ +#ifndef _ASM_ARM_SECTIONS_H +#define _ASM_ARM_SECTIONS_H + +#include + +extern char _exiprom[]; + +#endif /* _ASM_ARM_SECTIONS_H */ diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index efdddcb97dd1..4f14b5ce6535 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -34,7 +34,7 @@ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off. */ #undef MODULES_VADDR -#define MODULES_VADDR (((unsigned long)_etext + ~PMD_MASK) & PMD_MASK) +#define MODULES_VADDR (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK) #endif #ifdef CONFIG_MMU diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 7b1ded6fb628..40bc4cadb959 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -85,6 +85,7 @@ SECTIONS } . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); + _xiprom = .; /* XIP ROM area to be mapped */ .head.text : { _text = .; @@ -210,6 +211,7 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) #endif + _exiprom = .; /* End of XIP ROM area */ __data_loc = ALIGN(4); /* location in binary */ . = PAGE_OFFSET + TEXT_OFFSET; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 434d76f0b363..e4b681aafd6d 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1253,7 +1253,7 @@ static inline void prepare_page_table(void) #ifdef CONFIG_XIP_KERNEL /* The XIP kernel is mapped in the module area -- skip over it */ - addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK; + addr = ((unsigned long)_exiprom + PMD_SIZE - 1) & PMD_MASK; #endif for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); @@ -1335,7 +1335,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) #ifdef CONFIG_XIP_KERNEL map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); map.virtual = MODULES_VADDR; - map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; + map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK; map.type = MT_ROM; create_mapping(&map); #endif @@ -1426,7 +1426,11 @@ static void __init kmap_init(void) static void __init map_lowmem(void) { struct memblock_region *reg; +#ifdef CONFIG_XIP_KERNEL + phys_addr_t kernel_x_start = round_down(__pa(_sdata), SECTION_SIZE); +#else phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); +#endif phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); /* Map all the lowmem memory banks. */ -- cgit v1.2.3 From 64ac2e74f0b21505606faf725cb5633d63b8b728 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 26 Jan 2016 01:20:21 +0100 Subject: ARM: 8502/1: mm: mark section-aligned portion of rodata NX When rodata is large enough that it crosses a section boundary after the kernel text, mark the rest NX. This is as close to full NX of rodata as we can get without splitting page tables or doing section alignment via CONFIG_DEBUG_ALIGN_RODATA. When the config is: CONFIG_DEBUG_RODATA=y # CONFIG_DEBUG_ALIGN_RODATA is not set Before: ---[ Kernel Mapping ]--- 0x80000000-0x80100000 1M RW NX SHD 0x80100000-0x80a00000 9M ro x SHD 0x80a00000-0xa0000000 502M RW NX SHD After: ---[ Kernel Mapping ]--- 0x80000000-0x80100000 1M RW NX SHD 0x80100000-0x80700000 6M ro x SHD 0x80700000-0x80a00000 3M ro NX SHD 0x80a00000-0xa0000000 502M RW NX SHD Signed-off-by: Kees Cook Reviewed-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/vmlinux.lds.S | 9 +++++++-- arch/arm/mm/init.c | 7 ++++--- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index e087a2ed112b..fdca231e150a 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -12,9 +12,7 @@ #include #include #include -#ifdef CONFIG_DEBUG_RODATA #include -#endif #define PROC_INFO \ . = ALIGN(4); \ @@ -319,6 +317,13 @@ SECTIONS STABS_DEBUG } +/* + * Without CONFIG_DEBUG_ALIGN_RODATA, __start_rodata_section_aligned will + * be the first section-aligned location after __start_rodata. Otherwise, + * it will be equal to __start_rodata. + */ +__start_rodata_section_aligned = ALIGN(__start_rodata, 1 << SECTION_SHIFT); + /* * These must never be empty * If you have to comment these two assert statements out, your diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 53f42508025b..370581aeb871 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -582,6 +582,9 @@ struct section_perm { pmdval_t clear; }; +/* First section-aligned location at or after __start_rodata. */ +extern char __start_rodata_section_aligned[]; + static struct section_perm nx_perms[] = { /* Make pages tables, etc before _stext RW (set NX). */ { @@ -599,16 +602,14 @@ static struct section_perm nx_perms[] = { .mask = ~PMD_SECT_XN, .prot = PMD_SECT_XN, }, -#ifdef CONFIG_DEBUG_ALIGN_RODATA /* Make rodata NX (set RO in ro_perms below). */ { .name = "rodata NX", - .start = (unsigned long)__start_rodata, + .start = (unsigned long)__start_rodata_section_aligned, .end = (unsigned long)__init_begin, .mask = ~PMD_SECT_XN, .prot = PMD_SECT_XN, }, -#endif }; static struct section_perm ro_perms[] = { -- cgit v1.2.3 From 17f29d36e4732b91ae299a163d34ed1954500f42 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 15 Feb 2016 09:24:05 +0100 Subject: ARM: 8523/1: sa1111: ensure no negative value gets returned on positive match This patch ensures that existing bus match callbacks don't return negative values (which might be interpreted as potential errors in the future) in case of positive match. Signed-off-by: Marek Szyprowski Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 3d224941b541..fb0a0a4dfea4 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -1290,7 +1290,7 @@ static int sa1111_match(struct device *_dev, struct device_driver *_drv) struct sa1111_dev *dev = SA1111_DEV(_dev); struct sa1111_driver *drv = SA1111_DRV(_drv); - return dev->devid & drv->devid; + return !!(dev->devid & drv->devid); } static int sa1111_bus_suspend(struct device *dev, pm_message_t state) -- cgit v1.2.3 From e59941b9b3817c9b1dd6662d903150fe3f3c3b0b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 15 Feb 2016 20:16:24 +0100 Subject: ARM: 8527/1: virt: enable GICv3 system registers ARMv8 introduces system registers for the Generic Interrupt Controllers CPU and virtual interfaces. When GICv3 is implemented, EL2 needs to allow the kernel to use those registers, by changing the value of ICC_HSRE. Signed-off-by: Jean-Philippe Brucker Reviewed-by: Marc Zyngier Signed-off-by: Russell King --- arch/arm/kernel/hyp-stub.S | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch') diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 2a55373f49bf..0b1e4a93d67e 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -17,6 +17,7 @@ */ #include +#include #include #include #include @@ -159,6 +160,29 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE bic r7, #1 @ Clear ENABLE mcr p15, 0, r7, c14, c3, 1 @ CNTV_CTL 1: +#endif + +#ifdef CONFIG_ARM_GIC_V3 + @ Check whether GICv3 system registers are available + mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 + ubfx r7, r7, #28, #4 + cmp r7, #1 + bne 2f + + @ Enable system register accesses + mrc p15, 4, r7, c12, c9, 5 @ ICC_HSRE + orr r7, r7, #(ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE) + mcr p15, 4, r7, c12, c9, 5 @ ICC_HSRE + isb + + @ SRE bit could be forced to 0 by firmware. + @ Check whether it sticks before accessing any other sysreg + mrc p15, 4, r7, c12, c9, 5 @ ICC_HSRE + tst r7, #ICC_SRE_EL2_SRE + beq 2f + mov r7, #0 + mcr p15, 4, r7, c12, c11, 0 @ ICH_HCR +2: #endif bx lr @ The boot CPU mode is left in r4. -- cgit v1.2.3 From 38c81fca9ee4641937b270cb9c50f96881ba411e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 16 Feb 2016 03:19:52 +0100 Subject: ARM: 8528/1: drop redundant "PHONY += FORCE" "PHONY += FORCE" is already cared by scripts/Makefile.build, which this file is included from. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 9eca7aee927f..ba73a204e8b6 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -88,7 +88,7 @@ $(obj)/bootpImage: $(obj)/bootp/bootp FORCE $(call if_changed,objcopy) @$(kecho) ' Kernel: $@ is ready' -PHONY += initrd FORCE +PHONY += initrd initrd: @test "$(INITRD_PHYS)" != "" || \ (echo This machine does not support INITRD; exit -1) -- cgit v1.2.3 From a34c66357e1328fef6388ef6e10ce67275f5f5fe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 16 Feb 2016 03:27:34 +0100 Subject: ARM: 8529/1: remove 'i' and 'zi' targets These two targets were introduced by commit 13d5fadf45d1 ("[ARM] Make 'i' and 'zi' targets work") to short-circuit the dependencies for 'install' and 'zinstall'. After that, commit 19514fc665ff ('arm, kbuild: make "make install" not depend on vmlinux') eventually made "(z)install" equivalent to "(z)i". It is true that 'i' and 'zi' might be still useful as shorthands but the original intention had been already lost. They do not even show up in "make ARCH=arm help", so I hope this deletion does not have much impact. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/Makefile | 1 - arch/arm/boot/Makefile | 8 -------- 2 files changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index fe254108d1d9..46fb1cac2698 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -352,7 +352,6 @@ archclean: # My testing targets (bypasses dependencies) bp:; $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/bootpImage -i zi:; $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ define archhelp diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index ba73a204e8b6..48fab15cfc02 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -107,12 +107,4 @@ uinstall: $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/uImage System.map "$(INSTALL_PATH)" -zi: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ - $(obj)/zImage System.map "$(INSTALL_PATH)" - -i: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ - $(obj)/Image System.map "$(INSTALL_PATH)" - subdir- := bootp compressed dts -- cgit v1.2.3 From db57f88e4ccbbb6dd194b3f3088b3dc1987db423 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Mon, 10 Aug 2015 20:53:11 +0100 Subject: ARM: 8411/1: Add default SPARSEMEM settings We can still override these settings via mach/memory.h, but let's provide sensible defaults so that SPARSEMEM is available in the multiplatform kernels. Two platforms currently use SECTION_SIZE_BITS < 28, but are expected to work with 28 (albeit slightly less efficiently if not all banks are populated): - mach-rpc: uses 26 bits. Based on mach/hardware.h it looks like this platform puts RAM at 0x1000_0000 - 0x1fff_ffff, and I/O below 0x1000_0000. - mach-sa1100: uses 27 bits. mach/memory.h indicates that RAM occupies the entire range of 0xc000_0000 - 0xdfff_ffff. But Arnd says in that rpc and sa1100 will never have to use the default since they cannot be part of a multiplatform kernel, and that is unlikely to change. Several platforms need MAX_PHYSMEM_BITS >= 36 so we'll pick that as the minimum. Anything higher and we'll fail the SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH test in . Some analysis from Russell King at http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/298957.html: I think this is fine in as far as it goes - this means we end up with 256 entries in the mem_section array which means it occupies one page, which I think is acceptable overhead. The other thing to be aware of here is the obvious: #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS #error Allocator MAX_ORDER exceeds SECTION_SIZE #endif Which means that with 28 bits of section, that's a maximum allocator order of 16. We appear to allow FORCE_MAX_ZONEORDER to be set up to 64 in the case of shmobile, which doesn't seem like a sensible upper limit - and certainly isn't when sparsemem is enabled. Given this, I think that FORCE_MAX_ZONEORDER's help, and the dependencies probably could do with some improvement to make the issues more transparent. [gregory.0xf0: added notes from Arnd and Russell] Signed-off-by: Kevin Cernekee Acked-by: Arnd Bergmann Tested-by: Stephen Boyd Signed-off-by: Gregory Fong Signed-off-by: Russell King --- arch/arm/include/asm/sparsemem.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/sparsemem.h b/arch/arm/include/asm/sparsemem.h index 00098615c6f0..73e5e8513751 100644 --- a/arch/arm/include/asm/sparsemem.h +++ b/arch/arm/include/asm/sparsemem.h @@ -15,10 +15,11 @@ * Eg, if you have 2 banks of up to 64MB at 0x80000000, 0x84000000, * then MAX_PHYSMEM_BITS is 32, SECTION_SIZE_BITS is 26. * - * Define these in your mach/memory.h. + * These can be overridden in your mach/memory.h. */ -#if !defined(SECTION_SIZE_BITS) || !defined(MAX_PHYSMEM_BITS) -#error Sparsemem is not supported on this platform +#if !defined(MAX_PHYSMEM_BITS) || !defined(SECTION_SIZE_BITS) +#define MAX_PHYSMEM_BITS 36 +#define SECTION_SIZE_BITS 28 #endif #endif -- cgit v1.2.3 From d78114554939aec0344b494e759d0679224562db Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 2 Feb 2016 00:14:53 +0100 Subject: ARM: 8512/1: proc-v7.S: Adjust stack address when XIP_KERNEL When XIP_KERNEL is enabled, the virt to phys address translation for RAM is not the same as the virt to phys address translation for .text. The only way to know where physical RAM is located is to use PLAT_PHYS_OFFSET. The MACRO will be useful for other places where there is a similar problem. Signed-off-by: Nicolas Pitre Signed-off-by: Chris Brandt Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 8 ++++++++ arch/arm/mm/proc-v7.S | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 49bf6b1e2177..ebdaaf7dd19f 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -134,6 +134,14 @@ */ #define PLAT_PHYS_OFFSET UL(CONFIG_PHYS_OFFSET) +#ifdef CONFIG_XIP_KERNEL +#define PHYS_OFFSET_FIXUP \ + ( XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) - PAGE_OFFSET + \ + PLAT_PHYS_OFFSET - CONFIG_XIP_PHYS_ADDR ) +#else +#define PHYS_OFFSET_FIXUP 0 +#endif + #ifndef __ASSEMBLY__ /* diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 0f92d575a304..1595fb29ec12 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -487,7 +487,7 @@ __errata_finish: .align 2 __v7_setup_stack_ptr: - .word __v7_setup_stack - . + .word __v7_setup_stack - . + PHYS_OFFSET_FIXUP ENDPROC(__v7_setup) .bss -- cgit v1.2.3 From 8ff97fa31333e8d0f4f7029798d9c7d59359b05c Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 16 Feb 2016 17:33:56 +0000 Subject: ARM: make the physical-relative calculation more obvious The physical-relative calculation between the XIP text and data sections introduced by the previous patch was far from obvious. Let's simplify it by turning it into a macro which takes the two (virtual) addresses. This allows us to arrange the calculation in a more obvious manner - we can make it two sub-expressions which calculate the physical address for each symbol, and then takes the difference of those physical addresses. Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 15 +++++++++++---- arch/arm/mm/proc-v7.S | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index ebdaaf7dd19f..51fc9b3b52ea 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -135,11 +135,18 @@ #define PLAT_PHYS_OFFSET UL(CONFIG_PHYS_OFFSET) #ifdef CONFIG_XIP_KERNEL -#define PHYS_OFFSET_FIXUP \ - ( XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) - PAGE_OFFSET + \ - PLAT_PHYS_OFFSET - CONFIG_XIP_PHYS_ADDR ) +/* + * When referencing data in RAM from the XIP region in a relative manner + * with the MMU off, we need the relative offset between the two physical + * addresses. The macro below achieves this, which is: + * __pa(v_data) - __xip_pa(v_text) + */ +#define PHYS_RELATIVE(v_data, v_text) \ + (((v_data) - PAGE_OFFSET + PLAT_PHYS_OFFSET) - \ + ((v_text) - XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) + \ + CONFIG_XIP_PHYS_ADDR)) #else -#define PHYS_OFFSET_FIXUP 0 +#define PHYS_RELATIVE(v_data, v_text) ((v_data) - (v_text)) #endif #ifndef __ASSEMBLY__ diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 1595fb29ec12..0f8963a7e7d9 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -487,7 +487,7 @@ __errata_finish: .align 2 __v7_setup_stack_ptr: - .word __v7_setup_stack - . + PHYS_OFFSET_FIXUP + .word PHYS_RELATIVE(__v7_setup_stack, .) ENDPROC(__v7_setup) .bss -- cgit v1.2.3 From c7edd7f99cdaec65e9303b4c9edc19bb595564ed Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 19 Feb 2016 11:04:45 +0100 Subject: ARM: 8534/1: virt: fix hyp-stub build for pre-ARMv7 CPUs ARMv6 CPUs do not have virtualisation extensions, but hyp-stub.S is still included into the image to keep it generic. In order to use ARMv7 instructions during HYP initialisation, add -march=armv7-a flag to hyp-stub's build. On an ARMv6 CPU, __hyp_stub_install returns as soon as it detects that the mode isn't HYP, so we will never reach those instructions. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 2 ++ arch/arm/kernel/Makefile | 1 + 2 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 7a6a58ef8aaf..43788b1a1ac5 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -195,5 +195,7 @@ CFLAGS_font.o := -Dstatic= $(obj)/font.c: $(FONTC) $(call cmd,shipped) +AFLAGS_hyp-stub.o := -Wa,-march=armv7-a + $(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S $(call cmd,shipped) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 2c5f160be65e..ad325a8c7e1e 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o +AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a ifeq ($(CONFIG_ARM_PSCI),y) obj-$(CONFIG_SMP) += psci_smp.o endif -- cgit v1.2.3 From 8d9f49136770e8a49c4efd5a4a5b23f75fefaaea Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 19 Feb 2016 11:04:45 +0100 Subject: ARM: 8534/1: virt: fix hyp-stub build for pre-ARMv7 CPUs ARMv6 CPUs do not have virtualisation extensions, but hyp-stub.S is still included into the image to keep it generic. In order to use ARMv7 instructions during HYP initialisation, add -march=armv7-a flag to hyp-stub's build. On an ARMv6 CPU, __hyp_stub_install returns as soon as it detects that the mode isn't HYP, so we will never reach those instructions. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 2 ++ arch/arm/kernel/Makefile | 1 + 2 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 4c23a68a0917..af8ff8aeca45 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -186,5 +186,7 @@ CFLAGS_font.o := -Dstatic= $(obj)/font.c: $(FONTC) $(call cmd,shipped) +AFLAGS_hyp-stub.o := -Wa,-march=armv7-a + $(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S $(call cmd,shipped) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 2c5f160be65e..ad325a8c7e1e 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o +AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a ifeq ($(CONFIG_ARM_PSCI),y) obj-$(CONFIG_SMP) += psci_smp.o endif -- cgit v1.2.3 From ac96680d22ed86168bcccb4aae768bddb869481d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Feb 2016 16:41:54 +0100 Subject: ARM: 8535/1: mm: DEBUG_RODATA makes no sense with XIP_KERNEL When CONFIG_DEBUG_ALIGN_RODATA is set, we get a link error: arch/arm/mm/built-in.o:(.data+0x4bc): undefined reference to `__start_rodata_section_aligned' However, this combination is useless, as XIP_KERNEL implies that all the RODATA is already marked readonly, so both CONFIG_DEBUG_RODATA and CONFIG_DEBUG_ALIGN_RODATA (which depends on the other) are not needed with XIP_KERNEL, and this patches enforces that using a Kconfig dependency. Signed-off-by: Arnd Bergmann Fixes: 25362dc496ed ("ARM: 8501/1: mm: flip priority of CONFIG_DEBUG_RODATA") Acked-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 4daeda0a5b7f..55347662e5ed 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1039,7 +1039,7 @@ config ARCH_SUPPORTS_BIG_ENDIAN config DEBUG_RODATA bool "Make kernel text and rodata read-only" - depends on MMU + depends on MMU && !XIP_KERNEL default y if CPU_V7 help If this is set, kernel text and rodata memory will be made -- cgit v1.2.3 From 91c617d7a3af3b599ef97c48253a3f5ebb6cdadd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Feb 2016 16:41:55 +0100 Subject: ARM: 8536/1: mm: hide __start_rodata_section_aligned for non-debug builds The __start_rodata_section_aligned is only referenced by the DEBUG_RODATA code, which is only used when the MMU is enabled, but the definition fails on !MMU builds: arch/arm/kernel/vmlinux.lds:702: undefined symbol `SECTION_SHIFT' referenced in expression This hides the symbol whenever DEBUG_RODATA is disabled. Signed-off-by: Arnd Bergmann Fixes: 64ac2e74f0b2 ("ARM: 8502/1: mm: mark section-aligned portion of rodata NX") Acked-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index fdca231e150a..96de8924e518 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -317,12 +317,14 @@ SECTIONS STABS_DEBUG } +#ifdef CONFIG_DEBUG_RODATA /* * Without CONFIG_DEBUG_ALIGN_RODATA, __start_rodata_section_aligned will * be the first section-aligned location after __start_rodata. Otherwise, * it will be equal to __start_rodata. */ __start_rodata_section_aligned = ALIGN(__start_rodata, 1 << SECTION_SHIFT); +#endif /* * These must never be empty -- cgit v1.2.3 From 1475399207548333311e374446a113d4733b3878 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 19 Feb 2016 16:41:56 +0100 Subject: ARM: 8537/1: drop unused DEBUG_RODATA from XIP_KERNEL With CONFIG_DEBUG_RODATA not being sensible under XIP_KERNEL, remove it from the XIP linker script. Signed-off-by: Kees Cook Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/kernel/vmlinux-xip.lds.S | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 40bc4cadb959..cba1ec899a69 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -8,9 +8,6 @@ #include #include #include -#ifdef CONFIG_DEBUG_RODATA -#include -#endif #define PROC_INFO \ . = ALIGN(4); \ @@ -92,10 +89,6 @@ SECTIONS HEAD_TEXT } -#ifdef CONFIG_DEBUG_RODATA - . = ALIGN(1< Date: Thu, 18 Feb 2016 15:55:59 +0100 Subject: ARM: 8530/1: remove VIRT_TO_BUS All drivers that are relevant for rpc or footbridge have stopped using virt_to_bus a while ago, so we can remove it and avoid some harmless randconfig warnings for drivers that we do not care about: drivers/atm/zatm.c: In function 'poll_rx': drivers/atm/zatm.c:401:18: warning: 'bus_to_virt' is deprecated [-Wdeprecated-declarations] skb = ((struct rx_buffer_head *) bus_to_virt(here[2]))->skb; FWIW, the remaining drivers using this are: ATM: firestream, zatm, ambassador, horizon ISDN: hisax/netjet V4L: STA2X11, zoran Net: Appletalk LTPC, Tulip DE4x5, Toshiba IrDA WAN: comtrol sv11, cosa, lanmedia, sealevel SCSI: DPT_I2O, buslogic VME: CA91C142 My best guess is that all of the above are so hopelessly obsolete that we are best off removing all of them form the kernel, but that can be done another time. Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/Kconfig | 1 - arch/arm/include/asm/memory.h | 14 -------------- arch/arm/mach-footbridge/Kconfig | 1 - 3 files changed, 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cc95ff8f07cb..d67f1aa93213 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -572,7 +572,6 @@ config ARCH_RPC select NEED_MACH_IO_H select NEED_MACH_MEMORY_H select NO_IOPORT_MAP - select VIRT_TO_BUS help On the Acorn Risc-PC, Linux can support the internal IDE disk and CD-ROM interface, serial and parallel port, and the floppy drive. diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 51fc9b3b52ea..9427fd632552 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -318,20 +318,6 @@ static inline unsigned long __virt_to_idmap(unsigned long x) #define __bus_to_pfn(x) __phys_to_pfn(x) #endif -#ifdef CONFIG_VIRT_TO_BUS -#define virt_to_bus virt_to_bus -static inline __deprecated unsigned long virt_to_bus(void *x) -{ - return __virt_to_bus((unsigned long)x); -} - -#define bus_to_virt bus_to_virt -static inline __deprecated void *bus_to_virt(unsigned long x) -{ - return (void *)__bus_to_virt(x); -} -#endif - /* * Conversion between a struct page and a physical address. * diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig index 07152d00fc50..cbbdd84cf49a 100644 --- a/arch/arm/mach-footbridge/Kconfig +++ b/arch/arm/mach-footbridge/Kconfig @@ -68,7 +68,6 @@ config ARCH_NETWINDER select ISA select ISA_DMA select PCI - select VIRT_TO_BUS help Say Y here if you intend to run this kernel on the Rebel.COM NetWinder. Information about this machine can be found at: -- cgit v1.2.3 From 7d74a5f07694050d62fcee9120d20d702d7e6333 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Feb 2016 16:00:23 +0100 Subject: ARM: 8531/1: turn init_new_context into an inline function Almost all architectures define init_new_context() as a function, but on ARM, it's a macro and that causes a compiler warning when its return code is not used: drivers/firmware/efi/arm-runtime.c: In function 'efi_virtmap_init': arch/arm/include/asm/mmu_context.h:88:34: warning: statement with no effect [-Wunused-value] #define init_new_context(tsk,mm) 0 drivers/firmware/efi/arm-runtime.c:47:2: note: in expansion of macro 'init_new_context' init_new_context(NULL, &efi_mm); This changes the definition into an inline function, which gcc does not warn about. Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/include/asm/mmu_context.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 432ce8176498..fa5b42d44985 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -26,7 +26,12 @@ void __check_vmalloc_seq(struct mm_struct *mm); #ifdef CONFIG_CPU_HAS_ASID void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); -#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + atomic64_set(&mm->context.id, 0); + return 0; +} #ifdef CONFIG_ARM_ERRATA_798181 void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, @@ -85,7 +90,12 @@ static inline void finish_arch_post_lock_switch(void) #endif /* CONFIG_MMU */ -#define init_new_context(tsk,mm) 0 +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + return 0; +} + #endif /* CONFIG_CPU_HAS_ASID */ -- cgit v1.2.3 From 4d2b7d4cb4e0786d6435b1df95132033a5b0077a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Feb 2016 17:36:23 +0100 Subject: ARM: 8532/1: uncompress: mark putc as inline When CONFIG_DEBUG_ICEDCC is set, we don't use the platform specific putc() function, but use icedcc_putc() instead, so putc is unused and causes a compile time warning: In file included from ../arch/arm/boot/compressed/misc.c:28:0: arch/arm/mach-rpc/include/mach/uncompress.h:79:13: warning: 'putc' defined but not used [-Wunused-function] arch/arm/mach-w90x900/include/mach/uncompress.h:30:13: warning: 'putc' defined but not used [-Wunused-function] On most platforms, this does not happen, because putc is defined as 'static inline' so the compiler will automatically drop it when it's unused. This changes the remaining seven platforms to behave the same way. Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/mach-davinci/include/mach/uncompress.h | 2 +- arch/arm/mach-ks8695/include/mach/uncompress.h | 2 +- arch/arm/mach-netx/include/mach/uncompress.h | 2 +- arch/arm/mach-omap1/include/mach/uncompress.h | 2 +- arch/arm/mach-rpc/include/mach/uncompress.h | 2 +- arch/arm/mach-sa1100/include/mach/uncompress.h | 2 +- arch/arm/mach-w90x900/include/mach/uncompress.h | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-davinci/include/mach/uncompress.h b/arch/arm/mach-davinci/include/mach/uncompress.h index 8fb97b93b6bb..53b456a5bbe0 100644 --- a/arch/arm/mach-davinci/include/mach/uncompress.h +++ b/arch/arm/mach-davinci/include/mach/uncompress.h @@ -30,7 +30,7 @@ u32 *uart; /* PORT_16C550A, in polled non-fifo mode */ -static void putc(char c) +static inline void putc(char c) { if (!uart) return; diff --git a/arch/arm/mach-ks8695/include/mach/uncompress.h b/arch/arm/mach-ks8695/include/mach/uncompress.h index c089a1aea674..a001c7c34df2 100644 --- a/arch/arm/mach-ks8695/include/mach/uncompress.h +++ b/arch/arm/mach-ks8695/include/mach/uncompress.h @@ -17,7 +17,7 @@ #include #include -static void putc(char c) +static inline void putc(char c) { while (!(__raw_readl((void __iomem*)KS8695_UART_PA + KS8695_URLS) & URLS_URTHRE)) barrier(); diff --git a/arch/arm/mach-netx/include/mach/uncompress.h b/arch/arm/mach-netx/include/mach/uncompress.h index 5cb1051b5831..033875dbc32b 100644 --- a/arch/arm/mach-netx/include/mach/uncompress.h +++ b/arch/arm/mach-netx/include/mach/uncompress.h @@ -40,7 +40,7 @@ #define FR_BUSY (1<<3) #define FR_TXFF (1<<5) -static void putc(char c) +static inline void putc(char c) { unsigned long base; diff --git a/arch/arm/mach-omap1/include/mach/uncompress.h b/arch/arm/mach-omap1/include/mach/uncompress.h index 4869633de8cd..9cca6a56788f 100644 --- a/arch/arm/mach-omap1/include/mach/uncompress.h +++ b/arch/arm/mach-omap1/include/mach/uncompress.h @@ -45,7 +45,7 @@ static void set_omap_uart_info(unsigned char port) *uart_info = port; } -static void putc(int c) +static inline void putc(int c) { if (!uart_base) return; diff --git a/arch/arm/mach-rpc/include/mach/uncompress.h b/arch/arm/mach-rpc/include/mach/uncompress.h index 0fd4b0b8ef22..654a6f3f2547 100644 --- a/arch/arm/mach-rpc/include/mach/uncompress.h +++ b/arch/arm/mach-rpc/include/mach/uncompress.h @@ -76,7 +76,7 @@ int white; /* * This does not append a newline */ -static void putc(int c) +static inline void putc(int c) { extern void ll_write_char(char *, char c, char white); int x,y; diff --git a/arch/arm/mach-sa1100/include/mach/uncompress.h b/arch/arm/mach-sa1100/include/mach/uncompress.h index 73093dc89829..a1a041b9740b 100644 --- a/arch/arm/mach-sa1100/include/mach/uncompress.h +++ b/arch/arm/mach-sa1100/include/mach/uncompress.h @@ -19,7 +19,7 @@ #define UART(x) (*(volatile unsigned long *)(serial_port + (x))) -static void putc(int c) +static inline void putc(int c) { unsigned long serial_port; diff --git a/arch/arm/mach-w90x900/include/mach/uncompress.h b/arch/arm/mach-w90x900/include/mach/uncompress.h index 4b7c324ff664..3855ecebda6e 100644 --- a/arch/arm/mach-w90x900/include/mach/uncompress.h +++ b/arch/arm/mach-w90x900/include/mach/uncompress.h @@ -27,7 +27,7 @@ #define TX_DONE (UART_LSR_TEMT | UART_LSR_THRE) static volatile u32 * const uart_base = (u32 *)UART0_PA; -static void putc(int ch) +static inline void putc(int ch) { /* Check THRE and TEMT bits before we transmit the character. */ -- cgit v1.2.3 From b44c72de44173f47e782610e31d4cd1abca3741f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 22 Feb 2016 13:29:35 +0100 Subject: ARM: 8538/1: decompressor: drop unneeded assignments to "targets" The "targets" exists to specify which files need the corresponding ".*_cmd" files to be included during the build. In other words, it is used for files that need to detect the change of the command line by if_changed, if_changed_dep, and if_changed_rule. While, these files are just copied by "$(call cmd,shipped)". Adding them to the "targets" is meaningless because $(call cmd,...) never creates ".*_cmd" files. Such files as ".lib1funcs.S.cmd", ".ashldi3.S.cmd" do not exist in the first place. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index af8ff8aeca45..85066e8082c3 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -91,8 +91,8 @@ endif targets := vmlinux vmlinux.lds \ piggy.$(suffix_y) piggy.$(suffix_y).o \ - lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S bswapsdi2.o \ - bswapsdi2.S font.o font.c head.o misc.o $(OBJS) + lib1funcs.o ashldi3.o bswapsdi2.o \ + font.o head.o misc.o $(OBJS) # Make sure files are removed during clean extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \ -- cgit v1.2.3 From 684c12014b55f67a57487d2a19f10cb0000e8bd4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 22 Feb 2016 13:30:37 +0100 Subject: ARM: 8539/1: decompressor: drop more unneeded assignments to "targets" The objects "font.o" and "misc.o" are contained in $(OBJS), and it is already added to the "targets". Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 85066e8082c3..48c9db595ad2 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -92,7 +92,7 @@ endif targets := vmlinux vmlinux.lds \ piggy.$(suffix_y) piggy.$(suffix_y).o \ lib1funcs.o ashldi3.o bswapsdi2.o \ - font.o head.o misc.o $(OBJS) + head.o $(OBJS) # Make sure files are removed during clean extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \ -- cgit v1.2.3 From 822ec1b6ead2667c455227a0fc95c07c0515aed2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 22 Feb 2016 13:31:33 +0100 Subject: ARM: 8540/1: decompressor: use clean-files instead of extra-y to clean files This code works fine here, but it is tricky to use "extra-y" for specifying files to be removed during "make clean". Kbuild provides "clean-files" for this purpose. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 48c9db595ad2..1b3d3e5bbd68 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -94,8 +94,7 @@ targets := vmlinux vmlinux.lds \ lib1funcs.o ashldi3.o bswapsdi2.o \ head.o $(OBJS) -# Make sure files are removed during clean -extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \ +clean-files += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \ lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs) \ hyp-stub.S -- cgit v1.2.3 From b0b6abe5fd28423558d8e121e77e92e779f62de1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 22 Feb 2016 13:32:05 +0100 Subject: ARM: 8541/1: decompressor: drop redundant FORCE in Makefile The object "piggy.$(suffix_y).o" is created from "piggy.$(suffix).S" by the following pattern rule defined in scripts/Makefile.build: $(obj)/%.o: $(src)/%.S FORCE $(call if_changed_dep,as_o_S) FORCE is already added to the prerequisite of the object there. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 1b3d3e5bbd68..13190444b3f0 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -178,7 +178,7 @@ $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ $(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE $(call if_changed,$(suffix_y)) -$(obj)/piggy.$(suffix_y).o: $(obj)/piggy.$(suffix_y) FORCE +$(obj)/piggy.$(suffix_y).o: $(obj)/piggy.$(suffix_y) CFLAGS_font.o := -Dstatic= -- cgit v1.2.3 From 53f67545c21c756d7959fb86af30899c4b43141c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 22 Feb 2016 13:32:24 +0100 Subject: ARM: 8542/1: decompressor: merge piggy.*.S and simplify Makefile The files piggy.$(suffix).S are similar enough to be merged into a single file. This also allows clean up of the Makefile. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/boot/compressed/.gitignore | 6 +----- arch/arm/boot/compressed/Makefile | 14 ++++++-------- arch/arm/boot/compressed/piggy.S | 6 ++++++ arch/arm/boot/compressed/piggy.gzip.S | 6 ------ arch/arm/boot/compressed/piggy.lz4.S | 6 ------ arch/arm/boot/compressed/piggy.lzma.S | 6 ------ arch/arm/boot/compressed/piggy.lzo.S | 6 ------ arch/arm/boot/compressed/piggy.xzkern.S | 6 ------ 8 files changed, 13 insertions(+), 43 deletions(-) create mode 100644 arch/arm/boot/compressed/piggy.S delete mode 100644 arch/arm/boot/compressed/piggy.gzip.S delete mode 100644 arch/arm/boot/compressed/piggy.lz4.S delete mode 100644 arch/arm/boot/compressed/piggy.lzma.S delete mode 100644 arch/arm/boot/compressed/piggy.lzo.S delete mode 100644 arch/arm/boot/compressed/piggy.xzkern.S (limited to 'arch') diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index 0714e0334e33..86b2f5d28240 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -3,11 +3,7 @@ bswapsdi2.S font.c lib1funcs.S hyp-stub.S -piggy.gzip -piggy.lzo -piggy.lzma -piggy.xzkern -piggy.lz4 +piggy_data vmlinux vmlinux.lds diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 13190444b3f0..c67cd889a233 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -89,14 +89,12 @@ ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y) OBJS += $(libfdt_objs) atags_to_fdt.o endif -targets := vmlinux vmlinux.lds \ - piggy.$(suffix_y) piggy.$(suffix_y).o \ +targets := vmlinux vmlinux.lds piggy_data piggy.o \ lib1funcs.o ashldi3.o bswapsdi2.o \ head.o $(OBJS) -clean-files += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \ - lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs) \ - hyp-stub.S +clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S \ + $(libfdt) $(libfdt_hdrs) hyp-stub.S KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING @@ -168,17 +166,17 @@ fi efi-obj-$(CONFIG_EFI_STUB) := $(objtree)/drivers/firmware/efi/libstub/lib.a -$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ +$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.o \ $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) \ $(bswapsdi2) $(efi-obj-y) FORCE @$(check_for_multiple_zreladdr) $(call if_changed,ld) @$(check_for_bad_syms) -$(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE +$(obj)/piggy_data: $(obj)/../Image FORCE $(call if_changed,$(suffix_y)) -$(obj)/piggy.$(suffix_y).o: $(obj)/piggy.$(suffix_y) +$(obj)/piggy.o: $(obj)/piggy_data CFLAGS_font.o := -Dstatic= diff --git a/arch/arm/boot/compressed/piggy.S b/arch/arm/boot/compressed/piggy.S new file mode 100644 index 000000000000..f72088495f43 --- /dev/null +++ b/arch/arm/boot/compressed/piggy.S @@ -0,0 +1,6 @@ + .section .piggydata,#alloc + .globl input_data +input_data: + .incbin "arch/arm/boot/compressed/piggy_data" + .globl input_data_end +input_data_end: diff --git a/arch/arm/boot/compressed/piggy.gzip.S b/arch/arm/boot/compressed/piggy.gzip.S deleted file mode 100644 index a68adf91a165..000000000000 --- a/arch/arm/boot/compressed/piggy.gzip.S +++ /dev/null @@ -1,6 +0,0 @@ - .section .piggydata,#alloc - .globl input_data -input_data: - .incbin "arch/arm/boot/compressed/piggy.gzip" - .globl input_data_end -input_data_end: diff --git a/arch/arm/boot/compressed/piggy.lz4.S b/arch/arm/boot/compressed/piggy.lz4.S deleted file mode 100644 index 3d9a575618a3..000000000000 --- a/arch/arm/boot/compressed/piggy.lz4.S +++ /dev/null @@ -1,6 +0,0 @@ - .section .piggydata,#alloc - .globl input_data -input_data: - .incbin "arch/arm/boot/compressed/piggy.lz4" - .globl input_data_end -input_data_end: diff --git a/arch/arm/boot/compressed/piggy.lzma.S b/arch/arm/boot/compressed/piggy.lzma.S deleted file mode 100644 index d7e69cffbc0a..000000000000 --- a/arch/arm/boot/compressed/piggy.lzma.S +++ /dev/null @@ -1,6 +0,0 @@ - .section .piggydata,#alloc - .globl input_data -input_data: - .incbin "arch/arm/boot/compressed/piggy.lzma" - .globl input_data_end -input_data_end: diff --git a/arch/arm/boot/compressed/piggy.lzo.S b/arch/arm/boot/compressed/piggy.lzo.S deleted file mode 100644 index a425ad95959a..000000000000 --- a/arch/arm/boot/compressed/piggy.lzo.S +++ /dev/null @@ -1,6 +0,0 @@ - .section .piggydata,#alloc - .globl input_data -input_data: - .incbin "arch/arm/boot/compressed/piggy.lzo" - .globl input_data_end -input_data_end: diff --git a/arch/arm/boot/compressed/piggy.xzkern.S b/arch/arm/boot/compressed/piggy.xzkern.S deleted file mode 100644 index 5703f300d027..000000000000 --- a/arch/arm/boot/compressed/piggy.xzkern.S +++ /dev/null @@ -1,6 +0,0 @@ - .section .piggydata,#alloc - .globl input_data -input_data: - .incbin "arch/arm/boot/compressed/piggy.xzkern" - .globl input_data_end -input_data_end: -- cgit v1.2.3 From f57deb0be5c25fcf2d30dce113f0a6a985c227bb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 22 Feb 2016 13:32:58 +0100 Subject: ARM: 8543/1: decompressor: rename suffix_y to compress-y The "$(suffix_y)" no longer appears in the file names, but it just specifies the method of the file compression. The "compress-y" sounds more suitable. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index c67cd889a233..9541277b7952 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -66,11 +66,11 @@ endif CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)" -suffix_$(CONFIG_KERNEL_GZIP) = gzip -suffix_$(CONFIG_KERNEL_LZO) = lzo -suffix_$(CONFIG_KERNEL_LZMA) = lzma -suffix_$(CONFIG_KERNEL_XZ) = xzkern -suffix_$(CONFIG_KERNEL_LZ4) = lz4 +compress-$(CONFIG_KERNEL_GZIP) = gzip +compress-$(CONFIG_KERNEL_LZO) = lzo +compress-$(CONFIG_KERNEL_LZMA) = lzma +compress-$(CONFIG_KERNEL_XZ) = xzkern +compress-$(CONFIG_KERNEL_LZ4) = lz4 # Borrowed libfdt files for the ATAG compatibility mode @@ -174,7 +174,7 @@ $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.o \ @$(check_for_bad_syms) $(obj)/piggy_data: $(obj)/../Image FORCE - $(call if_changed,$(suffix_y)) + $(call if_changed,$(compress-y)) $(obj)/piggy.o: $(obj)/piggy_data -- cgit v1.2.3 From f474c8c857d996f34c39f66bbed23faaa739fad6 Mon Sep 17 00:00:00 2001 From: Mika Penttilä Date: Mon, 22 Feb 2016 17:56:52 +0100 Subject: ARM: 8544/1: set_memory_xx fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow zero size updates. This makes set_memory_xx() consistent with x86, s390 and arm64 and makes apply_to_page_range() not to BUG() when loading modules. Signed-off-by: Mika Penttilä mika.penttila@nextfour.com Signed-off-by: Russell King --- arch/arm/mm/pageattr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c index cf30daff8932..d19b1ad29b07 100644 --- a/arch/arm/mm/pageattr.c +++ b/arch/arm/mm/pageattr.c @@ -49,6 +49,9 @@ static int change_memory_common(unsigned long addr, int numpages, WARN_ON_ONCE(1); } + if (!numpages) + return 0; + if (start < MODULES_VADDR || start >= MODULES_END) return -EINVAL; -- cgit v1.2.3 From 19e6e5e5392bd646c93d9e2c7b2b58c8558cb041 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 3 Mar 2016 15:58:00 +0100 Subject: ARM: 8547/1: dma-mapping: store buffer information Keep a list of allocated DMA buffers so that we can store metadata in alloc() which we later need in free(). Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 9f996a3d79f7..696f6ee259ee 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -42,6 +42,31 @@ #include "dma.h" #include "mm.h" +struct arm_dma_buffer { + struct list_head list; + void *virt; +}; + +static LIST_HEAD(arm_dma_bufs); +static DEFINE_SPINLOCK(arm_dma_bufs_lock); + +static struct arm_dma_buffer *arm_dma_buffer_find(void *virt) +{ + struct arm_dma_buffer *buf, *found = NULL; + unsigned long flags; + + spin_lock_irqsave(&arm_dma_bufs_lock, flags); + list_for_each_entry(buf, &arm_dma_bufs, list) { + if (buf->virt == virt) { + list_del(&buf->list); + found = buf; + break; + } + } + spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); + return found; +} + /* * The DMA API is built upon the notion of "buffer ownership". A buffer * is either exclusively owned by the CPU (and therefore may be accessed @@ -620,6 +645,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, struct page *page = NULL; void *addr; bool want_vaddr; + struct arm_dma_buffer *buf; #ifdef CONFIG_DMA_API_DEBUG u64 limit = (mask + 1) & ~mask; @@ -633,6 +659,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (!mask) return NULL; + buf = kzalloc(sizeof(*buf), gfp); + if (!buf) + return NULL; + if (mask < 0xffffffffULL) gfp |= GFP_DMA; @@ -662,8 +692,18 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller, want_vaddr); - if (page) + if (page) { + unsigned long flags; + *handle = pfn_to_dma(dev, page_to_pfn(page)); + buf->virt = want_vaddr ? addr : page; + + spin_lock_irqsave(&arm_dma_bufs_lock, flags); + list_add(&buf->list, &arm_dma_bufs); + spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); + } else { + kfree(buf); + } return want_vaddr ? addr : page; } @@ -742,6 +782,11 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); + struct arm_dma_buffer *buf; + + buf = arm_dma_buffer_find(cpu_addr); + if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr)) + return; size = PAGE_ALIGN(size); @@ -760,6 +805,8 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, WARN_ON(irqs_disabled()); __free_from_contiguous(dev, page, cpu_addr, size, want_vaddr); } + + kfree(buf); } void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, -- cgit v1.2.3 From b42686761219036ace45192476d8ab64d86a4ece Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 3 Mar 2016 15:58:01 +0100 Subject: ARM: 8546/1: dma-mapping: refactor to fix coherent+cma+gfp=0 Given a device which uses arm_coherent_dma_ops and on which dev_get_cma_area(dev) returns non-NULL, the following usage of the DMA API with gfp=0 results in memory corruption and a memory leak. p = dma_alloc_coherent(dev, sz, &dma, 0); if (p) dma_free_coherent(dev, sz, p, dma); The memory leak is because the alloc allocates using __alloc_simple_buffer() but the free attempts dma_release_from_contiguous() which does not do free anything since the page is not in the CMA area. The memory corruption is because the free calls __dma_remap() on a page which is backed by only first level page tables. The apply_to_page_range() + __dma_update_pte() loop ends up interpreting the section mapping as an addresses to a second level page table and writing the new PTE to memory which is not used by page tables. We don't have access to the GFP flags used for allocation in the free function. Fix this by adding allocator backends and using this information in the free function so that we always use the correct release routine. Fixes: 21caf3a7 ("ARM: 8398/1: arm DMA: Fix allocation from CMA for coherent DMA") Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 165 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 128 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 696f6ee259ee..deac58d5f1f7 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -42,9 +42,33 @@ #include "dma.h" #include "mm.h" +struct arm_dma_alloc_args { + struct device *dev; + size_t size; + gfp_t gfp; + pgprot_t prot; + const void *caller; + bool want_vaddr; +}; + +struct arm_dma_free_args { + struct device *dev; + size_t size; + void *cpu_addr; + struct page *page; + bool want_vaddr; +}; + +struct arm_dma_allocator { + void *(*alloc)(struct arm_dma_alloc_args *args, + struct page **ret_page); + void (*free)(struct arm_dma_free_args *args); +}; + struct arm_dma_buffer { struct list_head list; void *virt; + struct arm_dma_allocator *allocator; }; static LIST_HEAD(arm_dma_bufs); @@ -617,7 +641,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL #define __alloc_from_pool(size, ret_page) NULL #define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL -#define __free_from_pool(cpu_addr, size) 0 +#define __free_from_pool(cpu_addr, size) do { } while (0) #define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0) #define __dma_free_remap(cpu_addr, size) do { } while (0) @@ -635,7 +659,78 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, return page_address(page); } +static void *simple_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_simple_buffer(args->dev, args->size, args->gfp, + ret_page); +} + +static void simple_allocator_free(struct arm_dma_free_args *args) +{ + __dma_free_buffer(args->page, args->size); +} + +static struct arm_dma_allocator simple_allocator = { + .alloc = simple_allocator_alloc, + .free = simple_allocator_free, +}; + +static void *cma_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_from_contiguous(args->dev, args->size, args->prot, + ret_page, args->caller, + args->want_vaddr); +} + +static void cma_allocator_free(struct arm_dma_free_args *args) +{ + __free_from_contiguous(args->dev, args->page, args->cpu_addr, + args->size, args->want_vaddr); +} + +static struct arm_dma_allocator cma_allocator = { + .alloc = cma_allocator_alloc, + .free = cma_allocator_free, +}; +static void *pool_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_from_pool(args->size, ret_page); +} + +static void pool_allocator_free(struct arm_dma_free_args *args) +{ + __free_from_pool(args->cpu_addr, args->size); +} + +static struct arm_dma_allocator pool_allocator = { + .alloc = pool_allocator_alloc, + .free = pool_allocator_free, +}; + +static void *remap_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_remap_buffer(args->dev, args->size, args->gfp, + args->prot, ret_page, args->caller, + args->want_vaddr); +} + +static void remap_allocator_free(struct arm_dma_free_args *args) +{ + if (args->want_vaddr) + __dma_free_remap(args->cpu_addr, args->size); + + __dma_free_buffer(args->page, args->size); +} + +static struct arm_dma_allocator remap_allocator = { + .alloc = remap_allocator_alloc, + .free = remap_allocator_free, +}; static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot, bool is_coherent, @@ -644,8 +739,16 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, u64 mask = get_coherent_dma_mask(dev); struct page *page = NULL; void *addr; - bool want_vaddr; + bool allowblock, cma; struct arm_dma_buffer *buf; + struct arm_dma_alloc_args args = { + .dev = dev, + .size = PAGE_ALIGN(size), + .gfp = gfp, + .prot = prot, + .caller = caller, + .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs), + }; #ifdef CONFIG_DMA_API_DEBUG u64 limit = (mask + 1) & ~mask; @@ -674,29 +777,28 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, * platform; see CONFIG_HUGETLBFS. */ gfp &= ~(__GFP_COMP); + args.gfp = gfp; *handle = DMA_ERROR_CODE; - size = PAGE_ALIGN(size); - want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); - - if (nommu()) - addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM)) - addr = __alloc_from_contiguous(dev, size, prot, &page, - caller, want_vaddr); - else if (is_coherent) - addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (!gfpflags_allow_blocking(gfp)) - addr = __alloc_from_pool(size, &page); + allowblock = gfpflags_allow_blocking(gfp); + cma = allowblock ? dev_get_cma_area(dev) : false; + + if (cma) + buf->allocator = &cma_allocator; + else if (nommu() || is_coherent) + buf->allocator = &simple_allocator; + else if (allowblock) + buf->allocator = &remap_allocator; else - addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, - caller, want_vaddr); + buf->allocator = &pool_allocator; + + addr = buf->allocator->alloc(&args, &page); if (page) { unsigned long flags; *handle = pfn_to_dma(dev, page_to_pfn(page)); - buf->virt = want_vaddr ? addr : page; + buf->virt = args.want_vaddr ? addr : page; spin_lock_irqsave(&arm_dma_bufs_lock, flags); list_add(&buf->list, &arm_dma_bufs); @@ -705,7 +807,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, kfree(buf); } - return want_vaddr ? addr : page; + return args.want_vaddr ? addr : page; } /* @@ -781,31 +883,20 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, bool is_coherent) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); - bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); struct arm_dma_buffer *buf; + struct arm_dma_free_args args = { + .dev = dev, + .size = PAGE_ALIGN(size), + .cpu_addr = cpu_addr, + .page = page, + .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs), + }; buf = arm_dma_buffer_find(cpu_addr); if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr)) return; - size = PAGE_ALIGN(size); - - if (nommu()) { - __dma_free_buffer(page, size); - } else if (!is_coherent && __free_from_pool(cpu_addr, size)) { - return; - } else if (!dev_get_cma_area(dev)) { - if (want_vaddr && !is_coherent) - __dma_free_remap(cpu_addr, size); - __dma_free_buffer(page, size); - } else { - /* - * Non-atomic allocations cannot be freed with IRQs disabled - */ - WARN_ON(irqs_disabled()); - __free_from_contiguous(dev, page, cpu_addr, size, want_vaddr); - } - + buf->allocator->free(&args); kfree(buf); } -- cgit v1.2.3