From 28ccf7f91b1ac42ee1f18480a69d2a7486b625ce Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 27 Sep 2006 18:30:07 +0900 Subject: sh: Selective flush_cache_mm() flushing. flush_cache_mm() wraps in to flush_cache_all(), which is rather excessive given that the number of PTEs within the specified context are generally quite low. Optimize for walking the mm's VMA list and selectively flushing the VMA ranges from the dcache. Invalidate the icache only if a VMA sets VM_EXEC. Signed-off-by: Paul Mundt --- arch/sh/mm/cache-sh4.c | 220 +++++++++++++++++++++++++++++-------------------- 1 file changed, 130 insertions(+), 90 deletions(-) diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 2203bd6aadb3..aa4f62f0e374 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -2,29 +2,31 @@ * arch/sh/mm/cache-sh4.c * * Copyright (C) 1999, 2000, 2002 Niibe Yutaka - * Copyright (C) 2001, 2002, 2003, 2004, 2005 Paul Mundt + * Copyright (C) 2001 - 2006 Paul Mundt * Copyright (C) 2003 Richard Curnow * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. */ - #include -#include #include -#include #include -#include #include #include #include #include -#include #include #include #include +/* + * The maximum number of pages we support up to when doing ranged dcache + * flushing. Anything exceeding this will simply flush the dcache in its + * entirety. + */ +#define MAX_DCACHE_PAGES 64 /* XXX: Tune for ways */ + static void __flush_dcache_segment_1way(unsigned long start, unsigned long extent); static void __flush_dcache_segment_2way(unsigned long start, @@ -219,14 +221,14 @@ void flush_cache_sigtramp(unsigned long addr) static inline void flush_cache_4096(unsigned long start, unsigned long phys) { - unsigned long flags; - /* * All types of SH-4 require PC to be in P2 to operate on the I-cache. * Some types of SH-4 require PC to be in P2 to operate on the D-cache. */ - if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) - || start < CACHE_OC_ADDRESS_ARRAY) { + if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) || + (start < CACHE_OC_ADDRESS_ARRAY)) { + unsigned long flags; + local_irq_save(flags); __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0x20000000); @@ -257,6 +259,7 @@ void flush_dcache_page(struct page *page) wmb(); } +/* TODO: Selective icache invalidation through IC address array.. */ static inline void flush_icache_all(void) { unsigned long flags, ccr; @@ -290,19 +293,121 @@ void flush_cache_all(void) flush_icache_all(); } +static void __flush_cache_mm(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + unsigned long d = 0, p = start & PAGE_MASK; + unsigned long alias_mask = cpu_data->dcache.alias_mask; + unsigned long n_aliases = cpu_data->dcache.n_aliases; + unsigned long select_bit; + unsigned long all_aliases_mask; + unsigned long addr_offset; + pgd_t *dir; + pmd_t *pmd; + pud_t *pud; + pte_t *pte; + int i; + + dir = pgd_offset(mm, p); + pud = pud_offset(dir, p); + pmd = pmd_offset(pud, p); + end = PAGE_ALIGN(end); + + all_aliases_mask = (1 << n_aliases) - 1; + + do { + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) { + p &= PMD_MASK; + p += PMD_SIZE; + pmd++; + + continue; + } + + pte = pte_offset_kernel(pmd, p); + + do { + unsigned long phys; + pte_t entry = *pte; + + if (!(pte_val(entry) & _PAGE_PRESENT)) { + pte++; + p += PAGE_SIZE; + continue; + } + + phys = pte_val(entry) & PTE_PHYS_MASK; + + if ((p ^ phys) & alias_mask) { + d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); + d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); + + if (d == all_aliases_mask) + goto loop_exit; + } + + pte++; + p += PAGE_SIZE; + } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); + pmd++; + } while (p < end); + +loop_exit: + addr_offset = 0; + select_bit = 1; + + for (i = 0; i < n_aliases; i++) { + if (d & select_bit) { + (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); + wmb(); + } + + select_bit <<= 1; + addr_offset += PAGE_SIZE; + } +} + +/* + * Note : (RPC) since the caches are physically tagged, the only point + * of flush_cache_mm for SH-4 is to get rid of aliases from the + * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that + * lines can stay resident so long as the virtual address they were + * accessed with (hence cache set) is in accord with the physical + * address (i.e. tag). It's no different here. So I reckon we don't + * need to flush the I-cache, since aliases don't matter for that. We + * should try that. + * + * Caller takes mm->mmap_sem. + */ void flush_cache_mm(struct mm_struct *mm) { /* - * Note : (RPC) since the caches are physically tagged, the only point - * of flush_cache_mm for SH-4 is to get rid of aliases from the - * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that - * lines can stay resident so long as the virtual address they were - * accessed with (hence cache set) is in accord with the physical - * address (i.e. tag). It's no different here. So I reckon we don't - * need to flush the I-cache, since aliases don't matter for that. We - * should try that. + * If cache is only 4k-per-way, there are never any 'aliases'. Since + * the cache is physically tagged, the data can just be left in there. + */ + if (cpu_data->dcache.n_aliases == 0) + return; + + /* + * Don't bother groveling around the dcache for the VMA ranges + * if there are too many PTEs to make it worthwhile. */ - flush_cache_all(); + if (mm->nr_ptes >= MAX_DCACHE_PAGES) + flush_dcache_all(); + else { + struct vm_area_struct *vma; + + /* + * In this case there are reasonably sized ranges to flush, + * iterate through the VMA list and take care of any aliases. + */ + for (vma = mm->mmap; vma; vma = vma->vm_next) + __flush_cache_mm(mm, vma->vm_start, vma->vm_end); + } + + /* Only touch the icache if one of the VMAs has VM_EXEC set. */ + if (mm->exec_vm) + flush_icache_all(); } /* @@ -311,7 +416,8 @@ void flush_cache_mm(struct mm_struct *mm) * ADDR: Virtual Address (U0 address) * PFN: Physical page number */ -void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) +void flush_cache_page(struct vm_area_struct *vma, unsigned long address, + unsigned long pfn) { unsigned long phys = pfn << PAGE_SHIFT; unsigned int alias_mask; @@ -358,87 +464,22 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigne void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long d = 0, p = start & PAGE_MASK; - unsigned long alias_mask = cpu_data->dcache.alias_mask; - unsigned long n_aliases = cpu_data->dcache.n_aliases; - unsigned long select_bit; - unsigned long all_aliases_mask; - unsigned long addr_offset; - unsigned long phys; - pgd_t *dir; - pmd_t *pmd; - pud_t *pud; - pte_t *pte; - pte_t entry; - int i; - /* * If cache is only 4k-per-way, there are never any 'aliases'. Since * the cache is physically tagged, the data can just be left in there. */ - if (n_aliases == 0) + if (cpu_data->dcache.n_aliases == 0) return; - all_aliases_mask = (1 << n_aliases) - 1; - /* * Don't bother with the lookup and alias check if we have a * wide range to cover, just blow away the dcache in its * entirety instead. -- PFM. */ - if (((end - start) >> PAGE_SHIFT) >= 64) { + if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES) flush_dcache_all(); - - if (vma->vm_flags & VM_EXEC) - flush_icache_all(); - - return; - } - - dir = pgd_offset(vma->vm_mm, p); - pud = pud_offset(dir, p); - pmd = pmd_offset(pud, p); - end = PAGE_ALIGN(end); - - do { - if (pmd_none(*pmd) || pmd_bad(*pmd)) { - p &= ~((1 << PMD_SHIFT) - 1); - p += (1 << PMD_SHIFT); - pmd++; - - continue; - } - - pte = pte_offset_kernel(pmd, p); - - do { - entry = *pte; - - if ((pte_val(entry) & _PAGE_PRESENT)) { - phys = pte_val(entry) & PTE_PHYS_MASK; - - if ((p ^ phys) & alias_mask) { - d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); - d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); - - if (d == all_aliases_mask) - goto loop_exit; - } - } - - pte++; - p += PAGE_SIZE; - } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); - pmd++; - } while (p < end); - -loop_exit: - for (i = 0, select_bit = 0x1, addr_offset = 0x0; i < n_aliases; - i++, select_bit <<= 1, addr_offset += PAGE_SIZE) - if (d & select_bit) { - (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); - wmb(); - } + else + __flush_cache_mm(vma->vm_mm, start, end); if (vma->vm_flags & VM_EXEC) { /* @@ -731,4 +772,3 @@ static void __flush_dcache_segment_4way(unsigned long start, a3 += linesz; } while (a0 < a0e); } - -- cgit v1.2.3