diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/mach-mvebu/coherency.c | 62 | ||||
-rw-r--r-- | arch/arm/mach-mvebu/coherency_ll.S | 77 |
2 files changed, 106 insertions, 33 deletions
diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index d5a975b6a590..477202fd39cc 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -29,8 +29,10 @@ #include <linux/slab.h> #include <linux/mbus.h> #include <linux/clk.h> +#include <linux/pci.h> #include <asm/smp_plat.h> #include <asm/cacheflush.h> +#include <asm/mach/map.h> #include "armada-370-xp.h" #include "coherency.h" #include "mvebu-soc-id.h" @@ -274,8 +276,8 @@ static struct dma_map_ops mvebu_hwcc_dma_ops = { .set_dma_mask = arm_dma_set_mask, }; -static int mvebu_hwcc_platform_notifier(struct notifier_block *nb, - unsigned long event, void *__dev) +static int mvebu_hwcc_notifier(struct notifier_block *nb, + unsigned long event, void *__dev) { struct device *dev = __dev; @@ -286,8 +288,8 @@ static int mvebu_hwcc_platform_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static struct notifier_block mvebu_hwcc_platform_nb = { - .notifier_call = mvebu_hwcc_platform_notifier, +static struct notifier_block mvebu_hwcc_nb = { + .notifier_call = mvebu_hwcc_notifier, }; static void __init armada_370_coherency_init(struct device_node *np) @@ -308,9 +310,47 @@ static void __init armada_370_coherency_init(struct device_node *np) set_cpu_coherent(); } +/* + * This ioremap hook is used on Armada 375/38x to ensure that PCIe + * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This + * is needed as a workaround for a deadlock issue between the PCIe + * interface and the cache controller. + */ +static void __iomem * +armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) +{ + struct resource pcie_mem; + + mvebu_mbus_get_pcie_mem_aperture(&pcie_mem); + + if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end) + mtype = MT_UNCACHED; + + return __arm_ioremap_caller(phys_addr, size, mtype, caller); +} + static void __init armada_375_380_coherency_init(struct device_node *np) { + struct device_node *cache_dn; + coherency_cpu_base = of_iomap(np, 0); + arch_ioremap_caller = armada_pcie_wa_ioremap_caller; + + /* + * Add the PL310 property "arm,io-coherent". This makes sure the + * outer sync operation is not used, which allows to + * workaround the system erratum that causes deadlocks when + * doing PCIe in an SMP situation on Armada 375 and Armada + * 38x. + */ + for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") { + struct property *p; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + p->name = kstrdup("arm,io-coherent", GFP_KERNEL); + of_add_property(cache_dn, p); + } } static int coherency_type(void) @@ -375,9 +415,21 @@ static int __init coherency_late_init(void) } bus_register_notifier(&platform_bus_type, - &mvebu_hwcc_platform_nb); + &mvebu_hwcc_nb); return 0; } postcore_initcall(coherency_late_init); + +#if IS_ENABLED(CONFIG_PCI) +static int __init coherency_pci_init(void) +{ + if (coherency_available()) + bus_register_notifier(&pci_bus_type, + &mvebu_hwcc_nb); + return 0; +} + +arch_initcall(coherency_pci_init); +#endif diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 6828f9f157b0..510c29e079ca 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -24,52 +24,69 @@ #include <asm/cp15.h> .text -/* Returns with the coherency address in r1 (r0 is untouched)*/ +/* Returns the coherency base address in r1 (r0 is untouched) */ ENTRY(ll_get_coherency_base) mrc p15, 0, r1, c1, c0, 0 tst r1, #CR_M @ Check MMU bit enabled bne 1f - /* use physical address of the coherency register */ + /* + * MMU is disabled, use the physical address of the coherency + * base address. + */ adr r1, 3f ldr r3, [r1] ldr r1, [r1, r3] b 2f 1: - /* use virtual address of the coherency register */ + /* + * MMU is enabled, use the virtual address of the coherency + * base address. + */ ldr r1, =coherency_base ldr r1, [r1] 2: mov pc, lr ENDPROC(ll_get_coherency_base) -/* Returns with the CPU ID in r3 (r0 is untouched)*/ -ENTRY(ll_get_cpuid) +/* + * Returns the coherency CPU mask in r3 (r0 is untouched). This + * coherency CPU mask can be used with the coherency fabric + * configuration and control registers. Note that the mask is already + * endian-swapped as appropriate so that the calling functions do not + * have to care about endianness issues while accessing the coherency + * fabric registers + */ +ENTRY(ll_get_coherency_cpumask) mrc 15, 0, r3, cr0, cr0, 5 and r3, r3, #15 mov r2, #(1 << 24) lsl r3, r2, r3 -ARM_BE8(rev r1, r1) +ARM_BE8(rev r3, r3) mov pc, lr -ENDPROC(ll_get_cpuid) +ENDPROC(ll_get_coherency_cpumask) -/* ll_add_cpu_to_smp_group, ll_enable_coherency and - * ll_disable_coherency use strex/ldrex whereas MMU can be off. The - * Armada XP SoC has an exclusive monitor that can track transactions - * to Device and/or SO and as such also when MMU is disabled the - * exclusive transactions will be functional +/* + * ll_add_cpu_to_smp_group(), ll_enable_coherency() and + * ll_disable_coherency() use the strex/ldrex instructions while the + * MMU can be disabled. The Armada XP SoC has an exclusive monitor + * that tracks transactions to Device and/or SO memory and thanks to + * that, exclusive transactions are functional even when the MMU is + * disabled. */ ENTRY(ll_add_cpu_to_smp_group) /* - * r0 being untouched in ll_get_coherency_base and - * ll_get_cpuid, we can use it to save lr modifing it with the - * following bl + * As r0 is not modified by ll_get_coherency_base() and + * ll_get_coherency_cpumask(), we use it to temporarly save lr + * and avoid it being modified by the branch and link + * calls. This function is used very early in the secondary + * CPU boot, and no stack is available at this point. */ - mov r0, lr + mov r0, lr bl ll_get_coherency_base - bl ll_get_cpuid - mov lr, r0 + bl ll_get_coherency_cpumask + mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET 1: ldrex r2, [r0] @@ -82,13 +99,15 @@ ENDPROC(ll_add_cpu_to_smp_group) ENTRY(ll_enable_coherency) /* - * r0 being untouched in ll_get_coherency_base and - * ll_get_cpuid, we can use it to save lr modifing it with the - * following bl + * As r0 is not modified by ll_get_coherency_base() and + * ll_get_coherency_cpumask(), we use it to temporarly save lr + * and avoid it being modified by the branch and link + * calls. This function is used very early in the secondary + * CPU boot, and no stack is available at this point. */ mov r0, lr bl ll_get_coherency_base - bl ll_get_cpuid + bl ll_get_coherency_cpumask mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET 1: @@ -104,14 +123,16 @@ ENDPROC(ll_enable_coherency) ENTRY(ll_disable_coherency) /* - * r0 being untouched in ll_get_coherency_base and - * ll_get_cpuid, we can use it to save lr modifing it with the - * following bl + * As r0 is not modified by ll_get_coherency_base() and + * ll_get_coherency_cpumask(), we use it to temporarly save lr + * and avoid it being modified by the branch and link + * calls. This function is used very early in the secondary + * CPU boot, and no stack is available at this point. */ - mov r0, lr + mov r0, lr bl ll_get_coherency_base - bl ll_get_cpuid - mov lr, r0 + bl ll_get_coherency_cpumask + mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET 1: ldrex r2, [r0] |