diff options
-rw-r--r-- | arch/microblaze/include/asm/dma-mapping.h | 20 | ||||
-rw-r--r-- | arch/microblaze/include/asm/elf.h | 8 | ||||
-rw-r--r-- | arch/microblaze/include/asm/system.h | 9 | ||||
-rw-r--r-- | arch/microblaze/include/asm/uaccess.h | 2 | ||||
-rw-r--r-- | arch/microblaze/kernel/cpu/cpuinfo.c | 1 | ||||
-rw-r--r-- | arch/microblaze/kernel/dma.c | 82 | ||||
-rw-r--r-- | arch/microblaze/kernel/exceptions.c | 2 | ||||
-rw-r--r-- | arch/microblaze/kernel/process.c | 1 | ||||
-rw-r--r-- | arch/microblaze/kernel/ptrace.c | 2 | ||||
-rw-r--r-- | arch/microblaze/kernel/timer.c | 3 | ||||
-rw-r--r-- | arch/microblaze/lib/Makefile | 1 | ||||
-rw-r--r-- | arch/microblaze/lib/uaccess_old.S | 123 | ||||
-rw-r--r-- | arch/microblaze/lib/ucmpdi2.c | 20 |
13 files changed, 220 insertions, 54 deletions
diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h index a569514cf19f..3a3e5b886854 100644 --- a/arch/microblaze/include/asm/dma-mapping.h +++ b/arch/microblaze/include/asm/dma-mapping.h @@ -28,12 +28,12 @@ #include <linux/dma-attrs.h> #include <asm/io.h> #include <asm-generic/dma-coherent.h> +#include <asm/cacheflush.h> #define DMA_ERROR_CODE (~(dma_addr_t)0x0) #define __dma_alloc_coherent(dev, gfp, size, handle) NULL #define __dma_free_coherent(size, addr) ((void)0) -#define __dma_sync(addr, size, rw) ((void)0) static inline unsigned long device_to_mask(struct device *dev) { @@ -95,6 +95,22 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask) #include <asm-generic/dma-mapping-common.h> +static inline void __dma_sync(unsigned long paddr, + size_t size, enum dma_data_direction direction) +{ + switch (direction) { + case DMA_TO_DEVICE: + case DMA_BIDIRECTIONAL: + flush_dcache_range(paddr, paddr + size); + break; + case DMA_FROM_DEVICE: + invalidate_dcache_range(paddr, paddr + size); + break; + default: + BUG(); + } +} + static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *ops = get_dma_ops(dev); @@ -135,7 +151,7 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { BUG_ON(direction == DMA_NONE); - __dma_sync(vaddr, size, (int)direction); + __dma_sync(virt_to_phys(vaddr), size, (int)direction); } #endif /* _ASM_MICROBLAZE_DMA_MAPPING_H */ diff --git a/arch/microblaze/include/asm/elf.h b/arch/microblaze/include/asm/elf.h index 098dfdde4b06..834849f59ae8 100644 --- a/arch/microblaze/include/asm/elf.h +++ b/arch/microblaze/include/asm/elf.h @@ -16,13 +16,15 @@ * I've snaffled the value from the microblaze binutils source code * /binutils/microblaze/include/elf/microblaze.h */ -#define EM_XILINX_MICROBLAZE 0xbaab -#define ELF_ARCH EM_XILINX_MICROBLAZE +#define EM_MICROBLAZE 189 +#define EM_MICROBLAZE_OLD 0xbaab +#define ELF_ARCH EM_MICROBLAZE /* * This is used to ensure we don't load something for the wrong architecture. */ -#define elf_check_arch(x) ((x)->e_machine == EM_XILINX_MICROBLAZE) +#define elf_check_arch(x) ((x)->e_machine == EM_MICROBLAZE \ + || (x)->e_machine == EM_MICROBLAZE_OLD) /* * These are used to set parameters in the core dumps. diff --git a/arch/microblaze/include/asm/system.h b/arch/microblaze/include/asm/system.h index e6a2284571dc..5a433cbaafb3 100644 --- a/arch/microblaze/include/asm/system.h +++ b/arch/microblaze/include/asm/system.h @@ -17,8 +17,6 @@ #include <asm-generic/cmpxchg.h> #include <asm-generic/cmpxchg-local.h> -#define __ARCH_WANT_INTERRUPTS_ON_CTXSW - struct task_struct; struct thread_info; @@ -96,11 +94,4 @@ extern struct dentry *of_debugfs_root; #define arch_align_stack(x) (x) -/* - * MicroBlaze doesn't handle unaligned accesses in hardware. - * - * Based on this we force the IP header alignment in network drivers. - */ -#define NET_IP_ALIGN 2 - #endif /* _ASM_MICROBLAZE_SYSTEM_H */ diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 5bb95a11880d..072b0077abf9 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -95,7 +95,7 @@ static inline int ___range_ok(unsigned long addr, unsigned long size) * - "addr", "addr + size" and "size" are all below the limit */ #define access_ok(type, addr, size) \ - (get_fs().seg > (((unsigned long)(addr)) | \ + (get_fs().seg >= (((unsigned long)(addr)) | \ (size) | ((unsigned long)(addr) + (size)))) /* || printk("access_ok failed for %s at 0x%08lx (size %d), seg 0x%08x\n", diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c index 44394d80a683..54194b28574a 100644 --- a/arch/microblaze/kernel/cpu/cpuinfo.c +++ b/arch/microblaze/kernel/cpu/cpuinfo.c @@ -34,6 +34,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = { {"8.00.a", 0x12}, {"8.00.b", 0x13}, {"8.10.a", 0x14}, + {"8.20.a", 0x15}, {NULL, 0}, }; diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index 393e6b2db688..dc6416d265d6 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -11,7 +11,6 @@ #include <linux/gfp.h> #include <linux/dma-debug.h> #include <asm/bug.h> -#include <asm/cacheflush.h> /* * Generic direct DMA implementation @@ -21,21 +20,6 @@ * can set archdata.dma_data to an unsigned long holding the offset. By * default the offset is PCI_DRAM_OFFSET. */ -static inline void __dma_sync_page(unsigned long paddr, unsigned long offset, - size_t size, enum dma_data_direction direction) -{ - switch (direction) { - case DMA_TO_DEVICE: - case DMA_BIDIRECTIONAL: - flush_dcache_range(paddr + offset, paddr + offset + size); - break; - case DMA_FROM_DEVICE: - invalidate_dcache_range(paddr + offset, paddr + offset + size); - break; - default: - BUG(); - } -} static unsigned long get_dma_direct_offset(struct device *dev) { @@ -91,7 +75,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, /* FIXME this part of code is untested */ for_each_sg(sgl, sg, nents, i) { sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev); - __dma_sync_page(page_to_phys(sg_page(sg)), sg->offset, + __dma_sync(page_to_phys(sg_page(sg)) + sg->offset, sg->length, direction); } @@ -116,7 +100,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, enum dma_data_direction direction, struct dma_attrs *attrs) { - __dma_sync_page(page_to_phys(page), offset, size, direction); + __dma_sync(page_to_phys(page) + offset, size, direction); return page_to_phys(page) + offset + get_dma_direct_offset(dev); } @@ -131,7 +115,63 @@ static inline void dma_direct_unmap_page(struct device *dev, * phys_to_virt is here because in __dma_sync_page is __virt_to_phys and * dma_address is physical address */ - __dma_sync_page(dma_address, 0 , size, direction); + __dma_sync(dma_address, size, direction); +} + +static inline void +dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + /* + * It's pointless to flush the cache as the memory segment + * is given to the CPU + */ + + if (direction == DMA_FROM_DEVICE) + __dma_sync(dma_handle, size, direction); +} + +static inline void +dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + /* + * It's pointless to invalidate the cache if the device isn't + * supposed to write to the relevant region + */ + + if (direction == DMA_TO_DEVICE) + __dma_sync(dma_handle, size, direction); +} + +static inline void +dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, + enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + /* FIXME this part of code is untested */ + if (direction == DMA_FROM_DEVICE) + for_each_sg(sgl, sg, nents, i) + __dma_sync(sg->dma_address, sg->length, direction); +} + +static inline void +dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, + enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + /* FIXME this part of code is untested */ + if (direction == DMA_TO_DEVICE) + for_each_sg(sgl, sg, nents, i) + __dma_sync(sg->dma_address, sg->length, direction); } struct dma_map_ops dma_direct_ops = { @@ -142,6 +182,10 @@ struct dma_map_ops dma_direct_ops = { .dma_supported = dma_direct_dma_supported, .map_page = dma_direct_map_page, .unmap_page = dma_direct_unmap_page, + .sync_single_for_cpu = dma_direct_sync_single_for_cpu, + .sync_single_for_device = dma_direct_sync_single_for_device, + .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, + .sync_sg_for_device = dma_direct_sync_sg_for_device, }; EXPORT_SYMBOL(dma_direct_ops); diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index 66fad2301221..6348dc82f428 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -119,7 +119,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, case MICROBLAZE_DIV_ZERO_EXCEPTION: if (user_mode(regs)) { pr_debug("Divide by zero exception in user mode\n"); - _exception(SIGILL, regs, FPE_INTDIV, addr); + _exception(SIGFPE, regs, FPE_INTDIV, addr); return; } printk(KERN_WARNING "Divide by zero exception " \ diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index dbb812421d8a..95cc295976a7 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -179,6 +179,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, ti->cpu_context.msr = (childregs->msr|MSR_VM); ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */ + ti->cpu_context.msr &= ~MSR_IE; #endif ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8; diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index 6a8e0cc5c57d..043cb58f9c44 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -148,7 +148,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) ret = -1L; if (unlikely(current->audit_context)) - audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12, + audit_syscall_entry(EM_MICROBLAZE, regs->r12, regs->r5, regs->r6, regs->r7, regs->r8); diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c index e5550ce4e0eb..af74b1113aab 100644 --- a/arch/microblaze/kernel/timer.c +++ b/arch/microblaze/kernel/timer.c @@ -308,7 +308,8 @@ unsigned long long notrace sched_clock(void) { if (timer_initialized) { struct clocksource *cs = &clocksource_microblaze; - cycle_t cyc = cnt32_to_63(cs->read(NULL)); + + cycle_t cyc = cnt32_to_63(cs->read(NULL)) & LLONG_MAX; return clocksource_cyc2ns(cyc, cs->mult, cs->shift); } return 0; diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile index 10c320aa908b..c13067b243c3 100644 --- a/arch/microblaze/lib/Makefile +++ b/arch/microblaze/lib/Makefile @@ -25,5 +25,6 @@ lib-y += lshrdi3.o lib-y += modsi3.o lib-y += muldi3.o lib-y += mulsi3.o +lib-y += ucmpdi2.o lib-y += udivsi3.o lib-y += umodsi3.o diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S index 5810cec54a7a..f037266cdaf3 100644 --- a/arch/microblaze/lib/uaccess_old.S +++ b/arch/microblaze/lib/uaccess_old.S @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/linkage.h> +#include <asm/page.h> /* * int __strncpy_user(char *to, char *from, int len); @@ -33,8 +34,8 @@ __strncpy_user: * r3 - temp count * r4 - temp val */ + beqid r7,3f addik r3,r7,0 /* temp_count = len */ - beqi r3,3f 1: lbu r4,r6,r0 sb r4,r5,r0 @@ -76,8 +77,8 @@ __strncpy_user: .type __strnlen_user, @function .align 4; __strnlen_user: + beqid r6,3f addik r3,r6,0 - beqi r3,3f 1: lbu r4,r5,r0 beqid r4,2f /* break on NUL */ @@ -102,6 +103,49 @@ __strnlen_user: .section __ex_table,"a" .word 1b,4b +/* Loop unrolling for __copy_tofrom_user */ +#define COPY(offset) \ +1: lwi r4 , r6, 0x0000 + offset; \ +2: lwi r19, r6, 0x0004 + offset; \ +3: lwi r20, r6, 0x0008 + offset; \ +4: lwi r21, r6, 0x000C + offset; \ +5: lwi r22, r6, 0x0010 + offset; \ +6: lwi r23, r6, 0x0014 + offset; \ +7: lwi r24, r6, 0x0018 + offset; \ +8: lwi r25, r6, 0x001C + offset; \ +9: swi r4 , r5, 0x0000 + offset; \ +10: swi r19, r5, 0x0004 + offset; \ +11: swi r20, r5, 0x0008 + offset; \ +12: swi r21, r5, 0x000C + offset; \ +13: swi r22, r5, 0x0010 + offset; \ +14: swi r23, r5, 0x0014 + offset; \ +15: swi r24, r5, 0x0018 + offset; \ +16: swi r25, r5, 0x001C + offset; \ + .section __ex_table,"a"; \ + .word 1b, 0f; \ + .word 2b, 0f; \ + .word 3b, 0f; \ + .word 4b, 0f; \ + .word 5b, 0f; \ + .word 6b, 0f; \ + .word 7b, 0f; \ + .word 8b, 0f; \ + .word 9b, 0f; \ + .word 10b, 0f; \ + .word 11b, 0f; \ + .word 12b, 0f; \ + .word 13b, 0f; \ + .word 14b, 0f; \ + .word 15b, 0f; \ + .word 16b, 0f; \ + .text + +#define COPY_80(offset) \ + COPY(0x00 + offset);\ + COPY(0x20 + offset);\ + COPY(0x40 + offset);\ + COPY(0x60 + offset); + /* * int __copy_tofrom_user(char *to, char *from, int len) * Return: @@ -119,34 +163,79 @@ __copy_tofrom_user: * r7, r3 - count * r4 - tempval */ - beqid r7, 3f /* zero size is not likely */ - andi r3, r7, 0x3 /* filter add count */ - bneid r3, 4f /* if is odd value then byte copying */ + beqid r7, 0f /* zero size is not likely */ or r3, r5, r6 /* find if is any to/from unaligned */ - andi r3, r3, 0x3 /* mask unaligned */ - bneid r3, 1f /* it is unaligned -> then jump */ + or r3, r3, r7 /* find if count is unaligned */ + andi r3, r3, 0x3 /* mask last 3 bits */ + bneid r3, bu1 /* if r3 is not zero then byte copying */ + or r3, r0, r0 + + rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */ + beqid r3, page; or r3, r0, r0 -/* at least one 4 byte copy */ -5: lw r4, r6, r3 -6: sw r4, r5, r3 +w1: lw r4, r6, r3 /* at least one 4 byte copy */ +w2: sw r4, r5, r3 addik r7, r7, -4 - bneid r7, 5b + bneid r7, w1 addik r3, r3, 4 addik r3, r7, 0 rtsd r15, 8 nop -4: or r3, r0, r0 -1: lbu r4,r6,r3 -2: sb r4,r5,r3 + + .section __ex_table,"a" + .word w1, 0f; + .word w2, 0f; + .text + +.align 4 /* Alignment is important to keep icache happy */ +page: /* Create room on stack and save registers for storign values */ + addik r1, r1, -32 + swi r19, r1, 4 + swi r20, r1, 8 + swi r21, r1, 12 + swi r22, r1, 16 + swi r23, r1, 20 + swi r24, r1, 24 + swi r25, r1, 28 +loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */ + /* Loop unrolling to get performance boost */ + COPY_80(0x000); + COPY_80(0x080); + COPY_80(0x100); + COPY_80(0x180); + /* copy loop */ + addik r6, r6, 0x200 + addik r7, r7, -0x200 + bneid r7, loop + addik r5, r5, 0x200 + /* Restore register content */ + lwi r19, r1, 4 + lwi r20, r1, 8 + lwi r21, r1, 12 + lwi r22, r1, 16 + lwi r23, r1, 20 + lwi r24, r1, 24 + lwi r25, r1, 28 + addik r1, r1, 32 + /* return back */ + addik r3, r7, 0 + rtsd r15, 8 + nop + +.align 4 /* Alignment is important to keep icache happy */ +bu1: lbu r4,r6,r3 +bu2: sb r4,r5,r3 addik r7,r7,-1 - bneid r7,1b + bneid r7,bu1 addik r3,r3,1 /* delay slot */ -3: +0: addik r3,r7,0 rtsd r15,8 nop .size __copy_tofrom_user, . - __copy_tofrom_user .section __ex_table,"a" - .word 1b,3b,2b,3b,5b,3b,6b,3b + .word bu1, 0b; + .word bu2, 0b; + .text diff --git a/arch/microblaze/lib/ucmpdi2.c b/arch/microblaze/lib/ucmpdi2.c new file mode 100644 index 000000000000..63ca105b6713 --- /dev/null +++ b/arch/microblaze/lib/ucmpdi2.c @@ -0,0 +1,20 @@ +#include <linux/module.h> + +#include "libgcc.h" + +word_type __ucmpdi2(unsigned long long a, unsigned long long b) +{ + const DWunion au = {.ll = a}; + const DWunion bu = {.ll = b}; + + if ((unsigned int) au.s.high < (unsigned int) bu.s.high) + return 0; + else if ((unsigned int) au.s.high > (unsigned int) bu.s.high) + return 2; + if ((unsigned int) au.s.low < (unsigned int) bu.s.low) + return 0; + else if ((unsigned int) au.s.low > (unsigned int) bu.s.low) + return 2; + return 1; +} +EXPORT_SYMBOL(__ucmpdi2); |