From 7dbaa466780a754154531b44c2086f6618cee3a8 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 22 Nov 2011 04:01:07 +0100 Subject: ARM: 7169/1: topdown mmap support Similar to other architectures, this adds topdown mmap support in user process address space allocation policy. This allows mmap sizes greater than 2GB. This support is largely copied from MIPS and the generic implementations. The address space randomization is moved into arch_pick_mmap_layout. Tested on V-Express with ubuntu and a mmap test from here: https://bugs.launchpad.net/bugs/861296 Signed-off-by: Rob Herring Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/mmap.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 168 insertions(+), 5 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 44b628e4d6ea..ce8cb1970d7a 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -11,10 +11,49 @@ #include #include +static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, + unsigned long pgoff) +{ + unsigned long base = addr & ~(SHMLBA-1); + unsigned long off = (pgoff << PAGE_SHIFT) & (SHMLBA-1); + + if (base + off <= addr) + return base + off; + + return base - off; +} + #define COLOUR_ALIGN(addr,pgoff) \ ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ (((pgoff)<personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_base(unsigned long rnd) +{ + unsigned long gap = rlimit(RLIMIT_STACK); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - rnd); +} + /* * We need to ensure that shared mappings are correctly aligned to * avoid aliasing issues with VIPT caches. We need to ensure that @@ -68,13 +107,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (len > mm->cached_hole_size) { start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } - /* 8 bits of randomness in 20 address space bits */ - if ((current->flags & PF_RANDOMIZE) && - !(current->personality & ADDR_NO_RANDOMIZE)) - addr += (get_random_int() % (1 << 8)) << PAGE_SHIFT; full_search: if (do_align) @@ -111,6 +146,134 @@ full_search: } } +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + + /* + * We only need to do colour alignment if either the I or D + * caches alias. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* requested length too big for entire address space */ + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; + } + + /* requesting a specific address */ + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + if (do_align) { + unsigned long base = COLOUR_ALIGN_DOWN(addr - len, pgoff); + addr = base + len; + } + + /* make sure it can fit in the remaining address space */ + if (addr > len) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + + if (mm->mmap_base < len) + goto bottomup; + + addr = mm->mmap_base - len; + if (do_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (!vma || addr+len <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start - len; + if (do_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + } while (len < vma->vm_start); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->cached_hole_size = ~0UL; + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + + return addr; +} + +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + unsigned long random_factor = 0UL; + + /* 8 bits of randomness in 20 address space bits */ + if ((current->flags & PF_RANDOMIZE) && + !(current->personality & ADDR_NO_RANDOMIZE)) + random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; + + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(random_factor); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} /* * You really shouldn't be using read() or write() on /dev/mem. This -- cgit v1.2.3 From 8878a539ff19a43cf3729e7562cd528f490246ae Mon Sep 17 00:00:00 2001 From: Kautuk Consul Date: Sun, 27 Nov 2011 17:49:50 +0100 Subject: ARM: 7178/1: fault.c: Port OOM changes into do_page_fault Commit d065bd810b6deb67d4897a14bfe21f8eb526ba99 (mm: retry page fault when blocking on disk transfer) and commit 37b23e0525d393d48a7d59f870b3bc061a30ccdb (x86,mm: make pagefault killable) The above commits introduced changes into the x86 pagefault handler for making the page fault handler retryable as well as killable. These changes reduce the mmap_sem hold time, which is crucial during OOM killer invocation. Port these changes to ARM. Without these changes, my ARM board encounters many hang and livelock scenarios. After applying this patch, OOM feature performance improves according to my testing. Signed-off-by: Kautuk Consul Signed-off-by: Russell King --- arch/arm/mm/fault.c | 58 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 19 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index aa33949fef60..4aabeaec25df 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -231,7 +231,7 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) static int __kprobes __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, - struct task_struct *tsk) + unsigned int flags, struct task_struct *tsk) { struct vm_area_struct *vma; int fault; @@ -253,18 +253,7 @@ good_area: goto out; } - /* - * If for any reason at all we couldn't handle the fault, make - * sure we exit gracefully rather than endlessly redo the fault. - */ - fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, (fsr & FSR_WRITE) ? FAULT_FLAG_WRITE : 0); - if (unlikely(fault & VM_FAULT_ERROR)) - return fault; - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; - return fault; + return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); check_stack: if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) @@ -279,6 +268,9 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; + int write = fsr & FSR_WRITE; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | + (write ? FAULT_FLAG_WRITE : 0); if (notify_page_fault(regs, fsr)) return 0; @@ -305,6 +297,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!down_read_trylock(&mm->mmap_sem)) { if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc)) goto no_context; +retry: down_read(&mm->mmap_sem); } else { /* @@ -320,14 +313,41 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) #endif } - fault = __do_page_fault(mm, addr, fsr, tsk); - up_read(&mm->mmap_sem); + fault = __do_page_fault(mm, addr, fsr, flags, tsk); + + /* If we need to retry but a fatal signal is pending, handle the + * signal first. We do not need to release the mmap_sem because + * it would already be released in __lock_page_or_retry in + * mm/filemap.c. */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return 0; + + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - if (fault & VM_FAULT_MAJOR) - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); - else if (fault & VM_FAULT_MINOR) - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, addr); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, addr); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + goto retry; + } + } + + up_read(&mm->mmap_sem); /* * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR -- cgit v1.2.3 From b4244738d20a631cd27fa105a2db71622618ab4e Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Fri, 9 Dec 2011 20:00:39 +0100 Subject: ARM: 7202/1: Add Cortex-A7 proc info This patch adds processor info for ARM Ltd. Cortex-A7. A7 is architecturally identical to A15 so it shares the same SMP initialization code and hwcaps. Tested-by: Will Deacon Signed-off-by: Pawel Moll Signed-off-by: Russell King --- arch/arm/mm/proc-v7.S | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 2c559ac38142..ea679ec19651 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -284,6 +284,7 @@ __v7_ca5mp_setup: __v7_ca9mp_setup: mov r10, #(1 << 0) @ TLB ops broadcasting b 1f +__v7_ca7mp_setup: __v7_ca15mp_setup: mov r10, #0 1: @@ -462,6 +463,16 @@ __v7_ca5mp_proc_info: __v7_proc __v7_ca5mp_setup .size __v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info + /* + * ARM Ltd. Cortex A7 processor. + */ + .type __v7_ca7mp_proc_info, #object +__v7_ca7mp_proc_info: + .long 0x410fc070 + .long 0xff0ffff0 + __v7_proc __v7_ca7mp_setup, hwcaps = HWCAP_IDIV + .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info + /* * ARM Ltd. Cortex A9 processor. */ -- cgit v1.2.3