diff options
author | Arnd Bergmann <arnd@arndb.de> | 2015-01-28 17:55:31 +0100 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2015-02-06 21:16:40 +0100 |
commit | 1d88967900b87f94435581dad4ae319686c6ce10 (patch) | |
tree | 7fc8d85ca097dfdb6a49276061ffb1466723ef0c /arch/arm/mm | |
parent | ARM: 8296/1: cache-l2x0: clean up aurora cache handling (diff) | |
download | linux-1d88967900b87f94435581dad4ae319686c6ce10.tar.xz linux-1d88967900b87f94435581dad4ae319686c6ce10.zip |
ARM: 8297/1: cache-l2x0: optimize aurora range operations
The aurora_inv_range(), aurora_clean_range() and aurora_flush_range()
functions are highly redundant, both in source and in object code, and
they are harder to understand than necessary.
By moving the range loop into the aurora_pa_range() function, they
become trivial wrappers, and the object code start looking like what
one would expect for an optimal implementation.
Further optimization may be possible by using the per-CPU "virtual"
registers to avoid the spinlocks in most cases.
(on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of
DMA traffic by reading data from a SD card)
Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 68 |
1 files changed, 22 insertions, 46 deletions
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 404c598da27d..5ea2d6d417f7 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1256,7 +1256,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = { * noninclusive, while the hardware cache range operations use * inclusive start and end addresses. */ -static unsigned long calc_range_end(unsigned long start, unsigned long end) +static unsigned long aurora_range_end(unsigned long start, unsigned long end) { /* * Limit the number of cache lines processed at once, @@ -1275,26 +1275,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end) return end; } -/* - * Make sure 'start' and 'end' reference the same page, as L2 is PIPT - * and range operations only do a TLB lookup on the start address. - */ static void aurora_pa_range(unsigned long start, unsigned long end, - unsigned long offset) + unsigned long offset) { void __iomem *base = l2x0_base; + unsigned long range_end; unsigned long flags; - raw_spin_lock_irqsave(&l2x0_lock, flags); - writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); - writel_relaxed(end, base + offset); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - - writel_relaxed(0, base + AURORA_SYNC_REG); -} - -static void aurora_inv_range(unsigned long start, unsigned long end) -{ /* * round start and end adresses up to cache line size */ @@ -1302,15 +1289,24 @@ static void aurora_inv_range(unsigned long start, unsigned long end) end = ALIGN(end, CACHE_LINE_SIZE); /* - * Invalidate all full cache lines between 'start' and 'end'. + * perform operation on all full cache lines between 'start' and 'end' */ while (start < end) { - unsigned long range_end = calc_range_end(start, end); - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_INVAL_RANGE_REG); + range_end = aurora_range_end(start, end); + + raw_spin_lock_irqsave(&l2x0_lock, flags); + writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); + writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + writel_relaxed(0, base + AURORA_SYNC_REG); start = range_end; } } +static void aurora_inv_range(unsigned long start, unsigned long end) +{ + aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); +} static void aurora_clean_range(unsigned long start, unsigned long end) { @@ -1318,36 +1314,16 @@ static void aurora_clean_range(unsigned long start, unsigned long end) * If L2 is forced to WT, the L2 will always be clean and we * don't need to do anything here. */ - if (!l2_wt_override) { - start &= ~(CACHE_LINE_SIZE - 1); - end = ALIGN(end, CACHE_LINE_SIZE); - while (start != end) { - unsigned long range_end = calc_range_end(start, end); - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_CLEAN_RANGE_REG); - start = range_end; - } - } + if (!l2_wt_override) + aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG); } static void aurora_flush_range(unsigned long start, unsigned long end) { - start &= ~(CACHE_LINE_SIZE - 1); - end = ALIGN(end, CACHE_LINE_SIZE); - while (start != end) { - unsigned long range_end = calc_range_end(start, end); - /* - * If L2 is forced to WT, the L2 will always be clean and we - * just need to invalidate. - */ - if (l2_wt_override) - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_INVAL_RANGE_REG); - else - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_FLUSH_RANGE_REG); - start = range_end; - } + if (l2_wt_override) + aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); + else + aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG); } static void aurora_flush_all(void) |