diff options
Diffstat (limited to 'arch')
446 files changed, 13997 insertions, 9892 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 0eae9df35b88..05d7a8a458d5 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -323,6 +323,17 @@ config HAVE_ARCH_SECCOMP_FILTER results in the system call being skipped immediately. - seccomp syscall wired up + For best performance, an arch should use seccomp_phase1 and + seccomp_phase2 directly. It should call seccomp_phase1 for all + syscalls if TIF_SECCOMP is set, but seccomp_phase1 does not + need to be called from a ptrace-safe context. It must then + call seccomp_phase2 if seccomp_phase1 returns anything other + than SECCOMP_PHASE1_OK or SECCOMP_PHASE1_SKIP. + + As an additional optimization, an arch may provide seccomp_data + directly to seccomp_phase1; this avoids multiple calls + to the syscall_xyz helpers for every syscall. + config SECCOMP_FILTER def_bool y depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index ed60a1ee1ed3..8f8eafbedd7c 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -17,8 +17,8 @@ #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } -#define atomic_read(v) (*(volatile int *)&(v)->counter) -#define atomic64_read(v) (*(volatile long *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) +#define atomic64_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v,i) ((v)->counter = (i)) #define atomic64_set(v,i) ((v)->counter = (i)) @@ -29,145 +29,92 @@ * branch back to restart the operation. */ -static __inline__ void atomic_add(int i, atomic_t * v) -{ - unsigned long temp; - __asm__ __volatile__( - "1: ldl_l %0,%1\n" - " addl %0,%2,%0\n" - " stl_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (v->counter) - :"Ir" (i), "m" (v->counter)); -} - -static __inline__ void atomic64_add(long i, atomic64_t * v) -{ - unsigned long temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%2,%0\n" - " stq_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (v->counter) - :"Ir" (i), "m" (v->counter)); -} - -static __inline__ void atomic_sub(int i, atomic_t * v) -{ - unsigned long temp; - __asm__ __volatile__( - "1: ldl_l %0,%1\n" - " subl %0,%2,%0\n" - " stl_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (v->counter) - :"Ir" (i), "m" (v->counter)); +#define ATOMIC_OP(op) \ +static __inline__ void atomic_##op(int i, atomic_t * v) \ +{ \ + unsigned long temp; \ + __asm__ __volatile__( \ + "1: ldl_l %0,%1\n" \ + " " #op "l %0,%2,%0\n" \ + " stl_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter) \ + :"Ir" (i), "m" (v->counter)); \ +} \ + +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + long temp, result; \ + smp_mb(); \ + __asm__ __volatile__( \ + "1: ldl_l %0,%1\n" \ + " " #op "l %0,%3,%2\n" \ + " " #op "l %0,%3,%0\n" \ + " stl_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ + :"Ir" (i), "m" (v->counter) : "memory"); \ + smp_mb(); \ + return result; \ } -static __inline__ void atomic64_sub(long i, atomic64_t * v) -{ - unsigned long temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " subq %0,%2,%0\n" - " stq_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (v->counter) - :"Ir" (i), "m" (v->counter)); -} - - -/* - * Same as above, but return the result value - */ -static inline int atomic_add_return(int i, atomic_t *v) -{ - long temp, result; - smp_mb(); - __asm__ __volatile__( - "1: ldl_l %0,%1\n" - " addl %0,%3,%2\n" - " addl %0,%3,%0\n" - " stl_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (v->counter), "=&r" (result) - :"Ir" (i), "m" (v->counter) : "memory"); - smp_mb(); - return result; +#define ATOMIC64_OP(op) \ +static __inline__ void atomic64_##op(long i, atomic64_t * v) \ +{ \ + unsigned long temp; \ + __asm__ __volatile__( \ + "1: ldq_l %0,%1\n" \ + " " #op "q %0,%2,%0\n" \ + " stq_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter) \ + :"Ir" (i), "m" (v->counter)); \ +} \ + +#define ATOMIC64_OP_RETURN(op) \ +static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ +{ \ + long temp, result; \ + smp_mb(); \ + __asm__ __volatile__( \ + "1: ldq_l %0,%1\n" \ + " " #op "q %0,%3,%2\n" \ + " " #op "q %0,%3,%0\n" \ + " stq_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ + :"Ir" (i), "m" (v->counter) : "memory"); \ + smp_mb(); \ + return result; \ } -static __inline__ long atomic64_add_return(long i, atomic64_t * v) -{ - long temp, result; - smp_mb(); - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%3,%2\n" - " addq %0,%3,%0\n" - " stq_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (v->counter), "=&r" (result) - :"Ir" (i), "m" (v->counter) : "memory"); - smp_mb(); - return result; -} +#define ATOMIC_OPS(opg) \ + ATOMIC_OP(opg) \ + ATOMIC_OP_RETURN(opg) \ + ATOMIC64_OP(opg) \ + ATOMIC64_OP_RETURN(opg) -static __inline__ long atomic_sub_return(int i, atomic_t * v) -{ - long temp, result; - smp_mb(); - __asm__ __volatile__( - "1: ldl_l %0,%1\n" - " subl %0,%3,%2\n" - " subl %0,%3,%0\n" - " stl_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (v->counter), "=&r" (result) - :"Ir" (i), "m" (v->counter) : "memory"); - smp_mb(); - return result; -} +ATOMIC_OPS(add) +ATOMIC_OPS(sub) -static __inline__ long atomic64_sub_return(long i, atomic64_t * v) -{ - long temp, result; - smp_mb(); - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " subq %0,%3,%2\n" - " subq %0,%3,%0\n" - " stq_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (v->counter), "=&r" (result) - :"Ir" (i), "m" (v->counter) : "memory"); - smp_mb(); - return result; -} +#undef ATOMIC_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP #define atomic64_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 1402fcc11c2c..f9c732e18284 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -446,7 +446,8 @@ struct procfs_args { * unhappy with OSF UFS. [CHECKME] */ static int -osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags) +osf_ufs_mount(const char __user *dirname, + struct ufs_args __user *args, int flags) { int retval; struct cdfs_args tmp; @@ -466,7 +467,8 @@ osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags) } static int -osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags) +osf_cdfs_mount(const char __user *dirname, + struct cdfs_args __user *args, int flags) { int retval; struct cdfs_args tmp; @@ -486,7 +488,8 @@ osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags) } static int -osf_procfs_mount(const char *dirname, struct procfs_args __user *args, int flags) +osf_procfs_mount(const char __user *dirname, + struct procfs_args __user *args, int flags) { struct procfs_args tmp; @@ -500,28 +503,22 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path, int, flag, void __user *, data) { int retval; - struct filename *name; - name = getname(path); - retval = PTR_ERR(name); - if (IS_ERR(name)) - goto out; switch (typenr) { case 1: - retval = osf_ufs_mount(name->name, data, flag); + retval = osf_ufs_mount(path, data, flag); break; case 6: - retval = osf_cdfs_mount(name->name, data, flag); + retval = osf_cdfs_mount(path, data, flag); break; case 9: - retval = osf_procfs_mount(name->name, data, flag); + retval = osf_procfs_mount(path, data, flag); break; default: retval = -EINVAL; printk("osf_mount(%ld, %x)\n", typenr, flag); } - putname(name); - out: + return retval; } diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 83f03ca6caf6..173f303a868f 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -25,79 +25,36 @@ #define atomic_set(v, i) (((v)->counter) = (i)) -static inline void atomic_add(int i, atomic_t *v) -{ - unsigned int temp; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " add %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) /* Early clobber, to prevent reg reuse */ - : "r"(&v->counter), "ir"(i) - : "cc"); -} - -static inline void atomic_sub(int i, atomic_t *v) -{ - unsigned int temp; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " sub %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(&v->counter), "ir"(i) - : "cc"); -} - -/* add and also return the new value */ -static inline int atomic_add_return(int i, atomic_t *v) -{ - unsigned int temp; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " add %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(&v->counter), "ir"(i) - : "cc"); - - return temp; -} - -static inline int atomic_sub_return(int i, atomic_t *v) -{ - unsigned int temp; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " sub %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(&v->counter), "ir"(i) - : "cc"); - - return temp; -} - -static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) -{ - unsigned int temp; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bic %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(addr), "ir"(mask) - : "cc"); +#define ATOMIC_OP(op, c_op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned int temp; \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%1] \n" \ + " " #asm_op " %0, %0, %2 \n" \ + " scond %0, [%1] \n" \ + " bnz 1b \n" \ + : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ + : "r"(&v->counter), "ir"(i) \ + : "cc"); \ +} \ + +#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned int temp; \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%1] \n" \ + " " #asm_op " %0, %0, %2 \n" \ + " scond %0, [%1] \n" \ + " bnz 1b \n" \ + : "=&r"(temp) \ + : "r"(&v->counter), "ir"(i) \ + : "cc"); \ + \ + return temp; \ } #else /* !CONFIG_ARC_HAS_LLSC */ @@ -126,6 +83,7 @@ static inline void atomic_set(atomic_t *v, int i) v->counter = i; atomic_ops_unlock(flags); } + #endif /* @@ -133,62 +91,46 @@ static inline void atomic_set(atomic_t *v, int i) * Locking would change to irq-disabling only (UP) and spinlocks (SMP) */ -static inline void atomic_add(int i, atomic_t *v) -{ - unsigned long flags; - - atomic_ops_lock(flags); - v->counter += i; - atomic_ops_unlock(flags); +#define ATOMIC_OP(op, c_op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + \ + atomic_ops_lock(flags); \ + v->counter c_op i; \ + atomic_ops_unlock(flags); \ } -static inline void atomic_sub(int i, atomic_t *v) -{ - unsigned long flags; - - atomic_ops_lock(flags); - v->counter -= i; - atomic_ops_unlock(flags); +#define ATOMIC_OP_RETURN(op, c_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + unsigned long temp; \ + \ + atomic_ops_lock(flags); \ + temp = v->counter; \ + temp c_op i; \ + v->counter = temp; \ + atomic_ops_unlock(flags); \ + \ + return temp; \ } -static inline int atomic_add_return(int i, atomic_t *v) -{ - unsigned long flags; - unsigned long temp; - - atomic_ops_lock(flags); - temp = v->counter; - temp += i; - v->counter = temp; - atomic_ops_unlock(flags); - - return temp; -} - -static inline int atomic_sub_return(int i, atomic_t *v) -{ - unsigned long flags; - unsigned long temp; - - atomic_ops_lock(flags); - temp = v->counter; - temp -= i; - v->counter = temp; - atomic_ops_unlock(flags); +#endif /* !CONFIG_ARC_HAS_LLSC */ - return temp; -} +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_OP_RETURN(op, c_op, asm_op) -static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) -{ - unsigned long flags; +ATOMIC_OPS(add, +=, add) +ATOMIC_OPS(sub, -=, sub) +ATOMIC_OP(and, &=, and) - atomic_ops_lock(flags); - *addr &= ~mask; - atomic_ops_unlock(flags); -} +#define atomic_clear_mask(mask, v) atomic_and(~(mask), (v)) -#endif /* !CONFIG_ARC_HAS_LLSC */ +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP /** * __atomic_add_unless - add unless the number is a given value diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 18f392f8b744..89c4b5ccc68d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1779,7 +1779,7 @@ config XEN_DOM0 depends on XEN config XEN - bool "Xen guest support on ARM (EXPERIMENTAL)" + bool "Xen guest support on ARM" depends on ARM && AEABI && OF depends on CPU_V7 && !CPU_V6 depends on !GENERIC_ATOMIC64 diff --git a/arch/arm/boot/dts/exynos5420-arndale-octa.dts b/arch/arm/boot/dts/exynos5420-arndale-octa.dts index 70a559cf1a3d..4f2df61c1cfc 100644 --- a/arch/arm/boot/dts/exynos5420-arndale-octa.dts +++ b/arch/arm/boot/dts/exynos5420-arndale-octa.dts @@ -69,7 +69,8 @@ samsung,dw-mshc-ddr-timing = <1 2>; pinctrl-names = "default"; pinctrl-0 = <&sd2_clk &sd2_cmd &sd2_cd &sd2_bus4>; - vmmc-supply = <&ldo10_reg>; + vmmc-supply = <&ldo19_reg>; + vqmmc-supply = <&ldo13_reg>; bus-width = <4>; cap-sd-highspeed; }; diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 3040359094d9..e22c11970b7b 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -27,7 +27,7 @@ * strex/ldrex monitor on some implementations. The reason we can use it for * atomic_set() is the clrex or dummy strex done on every exception return. */ -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v,i) (((v)->counter) = (i)) #if __LINUX_ARM_ARCH__ >= 6 @@ -37,84 +37,47 @@ * store exclusive to ensure that these are atomic. We may loop * to ensure that the update happens. */ -static inline void atomic_add(int i, atomic_t *v) -{ - unsigned long tmp; - int result; - - prefetchw(&v->counter); - __asm__ __volatile__("@ atomic_add\n" -"1: ldrex %0, [%3]\n" -" add %0, %0, %4\n" -" strex %1, %0, [%3]\n" -" teq %1, #0\n" -" bne 1b" - : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) - : "r" (&v->counter), "Ir" (i) - : "cc"); -} -static inline int atomic_add_return(int i, atomic_t *v) -{ - unsigned long tmp; - int result; - - smp_mb(); - prefetchw(&v->counter); - - __asm__ __volatile__("@ atomic_add_return\n" -"1: ldrex %0, [%3]\n" -" add %0, %0, %4\n" -" strex %1, %0, [%3]\n" -" teq %1, #0\n" -" bne 1b" - : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) - : "r" (&v->counter), "Ir" (i) - : "cc"); - - smp_mb(); - - return result; -} - -static inline void atomic_sub(int i, atomic_t *v) -{ - unsigned long tmp; - int result; - - prefetchw(&v->counter); - __asm__ __volatile__("@ atomic_sub\n" -"1: ldrex %0, [%3]\n" -" sub %0, %0, %4\n" -" strex %1, %0, [%3]\n" -" teq %1, #0\n" -" bne 1b" - : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) - : "r" (&v->counter), "Ir" (i) - : "cc"); -} - -static inline int atomic_sub_return(int i, atomic_t *v) -{ - unsigned long tmp; - int result; - - smp_mb(); - prefetchw(&v->counter); - - __asm__ __volatile__("@ atomic_sub_return\n" -"1: ldrex %0, [%3]\n" -" sub %0, %0, %4\n" -" strex %1, %0, [%3]\n" -" teq %1, #0\n" -" bne 1b" - : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) - : "r" (&v->counter), "Ir" (i) - : "cc"); - - smp_mb(); - - return result; +#define ATOMIC_OP(op, c_op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + prefetchw(&v->counter); \ + __asm__ __volatile__("@ atomic_" #op "\n" \ +"1: ldrex %0, [%3]\n" \ +" " #asm_op " %0, %0, %4\n" \ +" strex %1, %0, [%3]\n" \ +" teq %1, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "Ir" (i) \ + : "cc"); \ +} \ + +#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + smp_mb(); \ + prefetchw(&v->counter); \ + \ + __asm__ __volatile__("@ atomic_" #op "_return\n" \ +"1: ldrex %0, [%3]\n" \ +" " #asm_op " %0, %0, %4\n" \ +" strex %1, %0, [%3]\n" \ +" teq %1, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "Ir" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return result; \ } static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) @@ -174,33 +137,29 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #error SMP not supported on pre-ARMv6 CPUs #endif -static inline int atomic_add_return(int i, atomic_t *v) -{ - unsigned long flags; - int val; - - raw_local_irq_save(flags); - val = v->counter; - v->counter = val += i; - raw_local_irq_restore(flags); - - return val; -} -#define atomic_add(i, v) (void) atomic_add_return(i, v) - -static inline int atomic_sub_return(int i, atomic_t *v) -{ - unsigned long flags; - int val; - - raw_local_irq_save(flags); - val = v->counter; - v->counter = val -= i; - raw_local_irq_restore(flags); - - return val; +#define ATOMIC_OP(op, c_op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ +} \ + +#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int val; \ + \ + raw_local_irq_save(flags); \ + v->counter c_op i; \ + val = v->counter; \ + raw_local_irq_restore(flags); \ + \ + return val; \ } -#define atomic_sub(i, v) (void) atomic_sub_return(i, v) static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { @@ -228,6 +187,17 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #endif /* __LINUX_ARM_ARCH__ */ +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_OP_RETURN(op, c_op, asm_op) + +ATOMIC_OPS(add, +=, add) +ATOMIC_OPS(sub, -=, sub) + +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic_inc(v) atomic_add(1, v) @@ -300,89 +270,60 @@ static inline void atomic64_set(atomic64_t *v, long long i) } #endif -static inline void atomic64_add(long long i, atomic64_t *v) -{ - long long result; - unsigned long tmp; - - prefetchw(&v->counter); - __asm__ __volatile__("@ atomic64_add\n" -"1: ldrexd %0, %H0, [%3]\n" -" adds %Q0, %Q0, %Q4\n" -" adc %R0, %R0, %R4\n" -" strexd %1, %0, %H0, [%3]\n" -" teq %1, #0\n" -" bne 1b" - : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) - : "r" (&v->counter), "r" (i) - : "cc"); -} - -static inline long long atomic64_add_return(long long i, atomic64_t *v) -{ - long long result; - unsigned long tmp; - - smp_mb(); - prefetchw(&v->counter); - - __asm__ __volatile__("@ atomic64_add_return\n" -"1: ldrexd %0, %H0, [%3]\n" -" adds %Q0, %Q0, %Q4\n" -" adc %R0, %R0, %R4\n" -" strexd %1, %0, %H0, [%3]\n" -" teq %1, #0\n" -" bne 1b" - : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) - : "r" (&v->counter), "r" (i) - : "cc"); - - smp_mb(); - - return result; -} - -static inline void atomic64_sub(long long i, atomic64_t *v) -{ - long long result; - unsigned long tmp; - - prefetchw(&v->counter); - __asm__ __volatile__("@ atomic64_sub\n" -"1: ldrexd %0, %H0, [%3]\n" -" subs %Q0, %Q0, %Q4\n" -" sbc %R0, %R0, %R4\n" -" strexd %1, %0, %H0, [%3]\n" -" teq %1, #0\n" -" bne 1b" - : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) - : "r" (&v->counter), "r" (i) - : "cc"); +#define ATOMIC64_OP(op, op1, op2) \ +static inline void atomic64_##op(long long i, atomic64_t *v) \ +{ \ + long long result; \ + unsigned long tmp; \ + \ + prefetchw(&v->counter); \ + __asm__ __volatile__("@ atomic64_" #op "\n" \ +"1: ldrexd %0, %H0, [%3]\n" \ +" " #op1 " %Q0, %Q0, %Q4\n" \ +" " #op2 " %R0, %R0, %R4\n" \ +" strexd %1, %0, %H0, [%3]\n" \ +" teq %1, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "r" (i) \ + : "cc"); \ +} \ + +#define ATOMIC64_OP_RETURN(op, op1, op2) \ +static inline long long atomic64_##op##_return(long long i, atomic64_t *v) \ +{ \ + long long result; \ + unsigned long tmp; \ + \ + smp_mb(); \ + prefetchw(&v->counter); \ + \ + __asm__ __volatile__("@ atomic64_" #op "_return\n" \ +"1: ldrexd %0, %H0, [%3]\n" \ +" " #op1 " %Q0, %Q0, %Q4\n" \ +" " #op2 " %R0, %R0, %R4\n" \ +" strexd %1, %0, %H0, [%3]\n" \ +" teq %1, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "r" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return result; \ } -static inline long long atomic64_sub_return(long long i, atomic64_t *v) -{ - long long result; - unsigned long tmp; - - smp_mb(); - prefetchw(&v->counter); - - __asm__ __volatile__("@ atomic64_sub_return\n" -"1: ldrexd %0, %H0, [%3]\n" -" subs %Q0, %Q0, %Q4\n" -" sbc %R0, %R0, %R4\n" -" strexd %1, %0, %H0, [%3]\n" -" teq %1, #0\n" -" bne 1b" - : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) - : "r" (&v->counter), "r" (i) - : "cc"); +#define ATOMIC64_OPS(op, op1, op2) \ + ATOMIC64_OP(op, op1, op2) \ + ATOMIC64_OP_RETURN(op, op1, op2) - smp_mb(); +ATOMIC64_OPS(add, adds, adc) +ATOMIC64_OPS(sub, subs, sbc) - return result; -} +#undef ATOMIC64_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old, long long new) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index c45b61a4b4a5..85738b200023 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -265,22 +265,6 @@ extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs); -static inline void *dma_alloc_writecombine(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) -{ - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - return dma_alloc_attrs(dev, size, dma_handle, flag, &attrs); -} - -static inline void dma_free_writecombine(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) -{ - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); -} - /* * This can be called during early boot to increase the size of the atomic * coherent DMA pool above the default value of 256KiB. It must be called diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index a0a691d1cbee..fe972a2f3df3 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -114,18 +114,13 @@ void soft_restart(unsigned long addr) BUG(); } -static void null_restart(enum reboot_mode reboot_mode, const char *cmd) -{ -} - /* * Function pointers to optional machine specific functions */ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd) = null_restart; -EXPORT_SYMBOL_GPL(arm_pm_restart); +void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); /* * This is our default idle handler. @@ -230,7 +225,10 @@ void machine_restart(char *cmd) local_irq_disable(); smp_send_stop(); - arm_pm_restart(reboot_mode, cmd); + if (arm_pm_restart) + arm_pm_restart(reboot_mode, cmd); + else + do_kernel_restart(cmd); /* Give a grace period for failure to restart of 1s */ mdelay(1000); diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 0c27ed6f3f23..5e772a21ab97 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -933,8 +933,13 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) current_thread_info()->syscall = scno; /* Do the secure computing check first; failures should be fast. */ - if (secure_computing(scno) == -1) +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER + if (secure_computing() == -1) return -1; +#else + /* XXX: remove this once OABI gets fixed */ + secure_computing_strict(scno); +#endif if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index e35d880f9773..89cfdd6e50cb 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -42,7 +42,7 @@ */ static DEFINE_PER_CPU(unsigned long, cpu_scale); -unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) +unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) { return per_cpu(cpu_scale, cpu); } @@ -166,7 +166,7 @@ static void update_cpu_capacity(unsigned int cpu) set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n", - cpu, arch_scale_freq_capacity(NULL, cpu)); + cpu, arch_scale_cpu_capacity(NULL, cpu)); } #else diff --git a/arch/arm/mach-shmobile/board-koelsch.c b/arch/arm/mach-shmobile/board-koelsch.c index b7d5bc7659cd..126a8b4ec491 100644 --- a/arch/arm/mach-shmobile/board-koelsch.c +++ b/arch/arm/mach-shmobile/board-koelsch.c @@ -331,7 +331,6 @@ SDHI_REGULATOR(2, RCAR_GP_PIN(7, 19), RCAR_GP_PIN(2, 26)); static struct sh_mobile_sdhi_info sdhi0_info __initdata = { .tmio_caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD, - .tmio_caps2 = MMC_CAP2_NO_MULTI_READ, .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT, }; @@ -344,7 +343,6 @@ static struct resource sdhi0_resources[] __initdata = { static struct sh_mobile_sdhi_info sdhi1_info __initdata = { .tmio_caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD, - .tmio_caps2 = MMC_CAP2_NO_MULTI_READ, .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT, }; @@ -357,7 +355,6 @@ static struct resource sdhi1_resources[] __initdata = { static struct sh_mobile_sdhi_info sdhi2_info __initdata = { .tmio_caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD, - .tmio_caps2 = MMC_CAP2_NO_MULTI_READ, .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE, }; diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index e1d8215da0b0..f5a98e2942b3 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -630,7 +630,6 @@ static void __init lager_add_rsnd_device(void) static struct sh_mobile_sdhi_info sdhi0_info __initdata = { .tmio_caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD, - .tmio_caps2 = MMC_CAP2_NO_MULTI_READ, .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE, }; @@ -644,7 +643,6 @@ static struct resource sdhi0_resources[] __initdata = { static struct sh_mobile_sdhi_info sdhi2_info __initdata = { .tmio_caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD, - .tmio_caps2 = MMC_CAP2_NO_MULTI_READ, .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE, }; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c49ca4c738bb..ac9afde76dea 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -349,7 +349,7 @@ config XEN_DOM0 depends on XEN config XEN - bool "Xen guest support on ARM64 (EXPERIMENTAL)" + bool "Xen guest support on ARM64" depends on ARM64 && OF select SWIOTLB_XEN help diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts index f64900052f4e..8eb6d94c7851 100644 --- a/arch/arm64/boot/dts/apm-mustang.dts +++ b/arch/arm64/boot/dts/apm-mustang.dts @@ -40,3 +40,7 @@ &menet { status = "ok"; }; + +&xgenet { + status = "ok"; +}; diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index 4f6d04d52cca..87d3205e98d5 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -176,6 +176,16 @@ clock-output-names = "menetclk"; }; + xge0clk: xge0clk@1f61c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f61c000 0x0 0x1000>; + reg-names = "csr-reg"; + csr-mask = <0x3>; + clock-output-names = "xge0clk"; + }; + sataphy1clk: sataphy1clk@1f21c000 { compatible = "apm,xgene-device-clock"; #clock-cells = <1>; @@ -585,7 +595,8 @@ interrupts = <0x0 0x3c 0x4>; dma-coherent; clocks = <&menetclk 0>; - local-mac-address = [00 01 73 00 00 01]; + /* mac address will be overwritten by the bootloader */ + local-mac-address = [00 00 00 00 00 00]; phy-connection-type = "rgmii"; phy-handle = <&menetphy>; mdio { @@ -600,12 +611,26 @@ }; }; + xgenet: ethernet@1f610000 { + compatible = "apm,xgene-enet"; + status = "disabled"; + reg = <0x0 0x1f610000 0x0 0xd100>, + <0x0 0x1f600000 0x0 0X400>, + <0x0 0x18000000 0x0 0X200>; + reg-names = "enet_csr", "ring_csr", "ring_cmd"; + interrupts = <0x0 0x60 0x4>; + dma-coherent; + clocks = <&xge0clk 0>; + /* mac address will be overwritten by the bootloader */ + local-mac-address = [00 00 00 00 00 00]; + phy-connection-type = "xgmii"; + }; + rng: rng@10520000 { compatible = "apm,xgene-rng"; reg = <0x0 0x10520000 0x0 0x100>; interrupts = <0x0 0x41 0x4>; clocks = <&rngpkaclk 0>; }; - }; }; diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 65f1569ac96e..7047051ded40 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -35,7 +35,7 @@ * strex/ldrex monitor on some implementations. The reason we can use it for * atomic_set() is the clrex or dummy strex done on every exception return. */ -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v,i) (((v)->counter) = (i)) /* @@ -43,69 +43,51 @@ * store exclusive to ensure that these are atomic. We may loop * to ensure that the update happens. */ -static inline void atomic_add(int i, atomic_t *v) -{ - unsigned long tmp; - int result; - - asm volatile("// atomic_add\n" -"1: ldxr %w0, %2\n" -" add %w0, %w0, %w3\n" -" stxr %w1, %w0, %2\n" -" cbnz %w1, 1b" - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i)); -} - -static inline int atomic_add_return(int i, atomic_t *v) -{ - unsigned long tmp; - int result; - - asm volatile("// atomic_add_return\n" -"1: ldxr %w0, %2\n" -" add %w0, %w0, %w3\n" -" stlxr %w1, %w0, %2\n" -" cbnz %w1, 1b" - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "memory"); - - smp_mb(); - return result; -} - -static inline void atomic_sub(int i, atomic_t *v) -{ - unsigned long tmp; - int result; - asm volatile("// atomic_sub\n" -"1: ldxr %w0, %2\n" -" sub %w0, %w0, %w3\n" -" stxr %w1, %w0, %2\n" -" cbnz %w1, 1b" - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i)); +#define ATOMIC_OP(op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + asm volatile("// atomic_" #op "\n" \ +"1: ldxr %w0, %2\n" \ +" " #asm_op " %w0, %w0, %w3\n" \ +" stxr %w1, %w0, %2\n" \ +" cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i)); \ +} \ + +#define ATOMIC_OP_RETURN(op, asm_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + asm volatile("// atomic_" #op "_return\n" \ +"1: ldxr %w0, %2\n" \ +" " #asm_op " %w0, %w0, %w3\n" \ +" stlxr %w1, %w0, %2\n" \ +" cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : "memory"); \ + \ + smp_mb(); \ + return result; \ } -static inline int atomic_sub_return(int i, atomic_t *v) -{ - unsigned long tmp; - int result; +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_OP_RETURN(op, asm_op) - asm volatile("// atomic_sub_return\n" -"1: ldxr %w0, %2\n" -" sub %w0, %w0, %w3\n" -" stlxr %w1, %w0, %2\n" -" cbnz %w1, 1b" - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "memory"); +ATOMIC_OPS(add, add) +ATOMIC_OPS(sub, sub) - smp_mb(); - return result; -} +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) { @@ -157,72 +139,53 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) */ #define ATOMIC64_INIT(i) { (i) } -#define atomic64_read(v) (*(volatile long *)&(v)->counter) +#define atomic64_read(v) ACCESS_ONCE((v)->counter) #define atomic64_set(v,i) (((v)->counter) = (i)) -static inline void atomic64_add(u64 i, atomic64_t *v) -{ - long result; - unsigned long tmp; - - asm volatile("// atomic64_add\n" -"1: ldxr %0, %2\n" -" add %0, %0, %3\n" -" stxr %w1, %0, %2\n" -" cbnz %w1, 1b" - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i)); +#define ATOMIC64_OP(op, asm_op) \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + long result; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_" #op "\n" \ +"1: ldxr %0, %2\n" \ +" " #asm_op " %0, %0, %3\n" \ +" stxr %w1, %0, %2\n" \ +" cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i)); \ +} \ + +#define ATOMIC64_OP_RETURN(op, asm_op) \ +static inline long atomic64_##op##_return(long i, atomic64_t *v) \ +{ \ + long result; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_" #op "_return\n" \ +"1: ldxr %0, %2\n" \ +" " #asm_op " %0, %0, %3\n" \ +" stlxr %w1, %0, %2\n" \ +" cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : "memory"); \ + \ + smp_mb(); \ + return result; \ } -static inline long atomic64_add_return(long i, atomic64_t *v) -{ - long result; - unsigned long tmp; +#define ATOMIC64_OPS(op, asm_op) \ + ATOMIC64_OP(op, asm_op) \ + ATOMIC64_OP_RETURN(op, asm_op) - asm volatile("// atomic64_add_return\n" -"1: ldxr %0, %2\n" -" add %0, %0, %3\n" -" stlxr %w1, %0, %2\n" -" cbnz %w1, 1b" - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "memory"); +ATOMIC64_OPS(add, add) +ATOMIC64_OPS(sub, sub) - smp_mb(); - return result; -} - -static inline void atomic64_sub(u64 i, atomic64_t *v) -{ - long result; - unsigned long tmp; - - asm volatile("// atomic64_sub\n" -"1: ldxr %0, %2\n" -" sub %0, %0, %3\n" -" stxr %w1, %0, %2\n" -" cbnz %w1, 1b" - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i)); -} - -static inline long atomic64_sub_return(long i, atomic64_t *v) -{ - long result; - unsigned long tmp; - - asm volatile("// atomic64_sub_return\n" -"1: ldxr %0, %2\n" -" sub %0, %0, %3\n" -" stlxr %w1, %0, %2\n" -" cbnz %w1, 1b" - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "memory"); - - smp_mb(); - return result; -} +#undef ATOMIC64_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) { diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 89f41f7d27dd..c3065dbc4fa2 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -72,7 +72,6 @@ void (*pm_power_off)(void); EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -EXPORT_SYMBOL_GPL(arm_pm_restart); /* * This is our default idle handler. @@ -154,6 +153,8 @@ void machine_restart(char *cmd) /* Now call the architecture specific reboot code. */ if (arm_pm_restart) arm_pm_restart(reboot_mode, cmd); + else + do_kernel_restart(cmd); /* * Whoops - the architecture was unable to reboot. diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index 0780f3f2415b..2d07ce1c5327 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -19,33 +19,46 @@ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v, i) (((v)->counter) = i) +#define ATOMIC_OP_RETURN(op, asm_op, asm_con) \ +static inline int __atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int result; \ + \ + asm volatile( \ + "/* atomic_" #op "_return */\n" \ + "1: ssrf 5\n" \ + " ld.w %0, %2\n" \ + " " #asm_op " %0, %3\n" \ + " stcond %1, %0\n" \ + " brne 1b" \ + : "=&r" (result), "=o" (v->counter) \ + : "m" (v->counter), #asm_con (i) \ + : "cc"); \ + \ + return result; \ +} + +ATOMIC_OP_RETURN(sub, sub, rKs21) +ATOMIC_OP_RETURN(add, add, r) + +#undef ATOMIC_OP_RETURN + /* - * atomic_sub_return - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t + * Probably found the reason why we want to use sub with the signed 21-bit + * limit, it uses one less register than the add instruction that can add up to + * 32-bit values. * - * Atomically subtracts @i from @v. Returns the resulting value. + * Both instructions are 32-bit, to use a 16-bit instruction the immediate is + * very small; 4 bit. + * + * sub 32-bit, type IV, takes a register and subtracts a 21-bit immediate. + * add 32-bit, type II, adds two register values together. */ -static inline int atomic_sub_return(int i, atomic_t *v) -{ - int result; - - asm volatile( - "/* atomic_sub_return */\n" - "1: ssrf 5\n" - " ld.w %0, %2\n" - " sub %0, %3\n" - " stcond %1, %0\n" - " brne 1b" - : "=&r"(result), "=o"(v->counter) - : "m"(v->counter), "rKs21"(i) - : "cc"); - - return result; -} +#define IS_21BIT_CONST(i) \ + (__builtin_constant_p(i) && ((i) >= -1048575) && ((i) <= 1048576)) /* * atomic_add_return - add integer to atomic variable @@ -56,51 +69,25 @@ static inline int atomic_sub_return(int i, atomic_t *v) */ static inline int atomic_add_return(int i, atomic_t *v) { - int result; - - if (__builtin_constant_p(i) && (i >= -1048575) && (i <= 1048576)) - result = atomic_sub_return(-i, v); - else - asm volatile( - "/* atomic_add_return */\n" - "1: ssrf 5\n" - " ld.w %0, %1\n" - " add %0, %3\n" - " stcond %2, %0\n" - " brne 1b" - : "=&r"(result), "=o"(v->counter) - : "m"(v->counter), "r"(i) - : "cc", "memory"); + if (IS_21BIT_CONST(i)) + return __atomic_sub_return(-i, v); - return result; + return __atomic_add_return(i, v); } /* - * atomic_sub_unless - sub unless the number is a given value + * atomic_sub_return - subtract the atomic variable + * @i: integer value to subtract * @v: pointer of type atomic_t - * @a: the amount to subtract from v... - * @u: ...unless v is equal to u. * - * Atomically subtract @a from @v, so long as it was not @u. - * Returns the old value of @v. -*/ -static inline void atomic_sub_unless(atomic_t *v, int a, int u) + * Atomically subtracts @i from @v. Returns the resulting value. + */ +static inline int atomic_sub_return(int i, atomic_t *v) { - int tmp; + if (IS_21BIT_CONST(i)) + return __atomic_sub_return(i, v); - asm volatile( - "/* atomic_sub_unless */\n" - "1: ssrf 5\n" - " ld.w %0, %2\n" - " cp.w %0, %4\n" - " breq 1f\n" - " sub %0, %3\n" - " stcond %1, %0\n" - " brne 1b\n" - "1:" - : "=&r"(tmp), "=o"(v->counter) - : "m"(v->counter), "rKs21"(a), "rKs21"(u) - : "cc", "memory"); + return __atomic_add_return(-i, v); } /* @@ -116,9 +103,21 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) { int tmp, old = atomic_read(v); - if (__builtin_constant_p(a) && (a >= -1048575) && (a <= 1048576)) - atomic_sub_unless(v, -a, u); - else { + if (IS_21BIT_CONST(a)) { + asm volatile( + "/* __atomic_sub_unless */\n" + "1: ssrf 5\n" + " ld.w %0, %2\n" + " cp.w %0, %4\n" + " breq 1f\n" + " sub %0, %3\n" + " stcond %1, %0\n" + " brne 1b\n" + "1:" + : "=&r"(tmp), "=o"(v->counter) + : "m"(v->counter), "rKs21"(-a), "rKs21"(u) + : "cc", "memory"); + } else { asm volatile( "/* __atomic_add_unless */\n" "1: ssrf 5\n" @@ -137,6 +136,8 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) return old; } +#undef IS_21BIT_CONST + /* * atomic_sub_if_positive - conditionally subtract integer from atomic variable * @i: integer value to subtract diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c index 29eb02ab3f25..0f3983241e60 100644 --- a/arch/cris/arch-v10/drivers/sync_serial.c +++ b/arch/cris/arch-v10/drivers/sync_serial.c @@ -1086,7 +1086,6 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, } local_irq_restore(flags); schedule(); - set_current_state(TASK_RUNNING); remove_wait_queue(&port->out_wait_q, &wait); if (signal_pending(current)) return -EINTR; diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index bbb806b68838..5a149134cfb5 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -1089,7 +1089,6 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, } schedule(); - set_current_state(TASK_RUNNING); remove_wait_queue(&port->out_wait_q, &wait); if (signal_pending(current)) diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h index aa429baebaf9..279766a70664 100644 --- a/arch/cris/include/asm/atomic.h +++ b/arch/cris/include/asm/atomic.h @@ -17,48 +17,41 @@ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v,i) (((v)->counter) = (i)) /* These should be written in asm but we do it in C for now. */ -static inline void atomic_add(int i, volatile atomic_t *v) -{ - unsigned long flags; - cris_atomic_save(v, flags); - v->counter += i; - cris_atomic_restore(v, flags); +#define ATOMIC_OP(op, c_op) \ +static inline void atomic_##op(int i, volatile atomic_t *v) \ +{ \ + unsigned long flags; \ + cris_atomic_save(v, flags); \ + v->counter c_op i; \ + cris_atomic_restore(v, flags); \ +} \ + +#define ATOMIC_OP_RETURN(op, c_op) \ +static inline int atomic_##op##_return(int i, volatile atomic_t *v) \ +{ \ + unsigned long flags; \ + int retval; \ + cris_atomic_save(v, flags); \ + retval = (v->counter c_op i); \ + cris_atomic_restore(v, flags); \ + return retval; \ } -static inline void atomic_sub(int i, volatile atomic_t *v) -{ - unsigned long flags; - cris_atomic_save(v, flags); - v->counter -= i; - cris_atomic_restore(v, flags); -} +#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) -static inline int atomic_add_return(int i, volatile atomic_t *v) -{ - unsigned long flags; - int retval; - cris_atomic_save(v, flags); - retval = (v->counter += i); - cris_atomic_restore(v, flags); - return retval; -} +ATOMIC_OPS(add, +=) +ATOMIC_OPS(sub, -=) -#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP -static inline int atomic_sub_return(int i, volatile atomic_t *v) -{ - unsigned long flags; - int retval; - cris_atomic_save(v, flags); - retval = (v->counter -= i); - cris_atomic_restore(v, flags); - return retval; -} +#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) static inline int atomic_sub_and_test(int i, volatile atomic_t *v) { diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index f6c3a1690101..102190a61d65 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -31,7 +31,7 @@ */ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v, i) (((v)->counter) = (i)) #ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index de916b11bff5..93d07025f183 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -94,41 +94,47 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new) return __oldval; } -static inline int atomic_add_return(int i, atomic_t *v) -{ - int output; - - __asm__ __volatile__ ( - "1: %0 = memw_locked(%1);\n" - " %0 = add(%0,%2);\n" - " memw_locked(%1,P3)=%0;\n" - " if !P3 jump 1b;\n" - : "=&r" (output) - : "r" (&v->counter), "r" (i) - : "memory", "p3" - ); - return output; - +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + int output; \ + \ + __asm__ __volatile__ ( \ + "1: %0 = memw_locked(%1);\n" \ + " %0 = "#op "(%0,%2);\n" \ + " memw_locked(%1,P3)=%0;\n" \ + " if !P3 jump 1b;\n" \ + : "=&r" (output) \ + : "r" (&v->counter), "r" (i) \ + : "memory", "p3" \ + ); \ +} \ + +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int output; \ + \ + __asm__ __volatile__ ( \ + "1: %0 = memw_locked(%1);\n" \ + " %0 = "#op "(%0,%2);\n" \ + " memw_locked(%1,P3)=%0;\n" \ + " if !P3 jump 1b;\n" \ + : "=&r" (output) \ + : "r" (&v->counter), "r" (i) \ + : "memory", "p3" \ + ); \ + return output; \ } -#define atomic_add(i, v) atomic_add_return(i, (v)) +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) -static inline int atomic_sub_return(int i, atomic_t *v) -{ - int output; - __asm__ __volatile__ ( - "1: %0 = memw_locked(%1);\n" - " %0 = sub(%0,%2);\n" - " memw_locked(%1,P3)=%0\n" - " if !P3 jump 1b;\n" - : "=&r" (output) - : "r" (&v->counter), "r" (i) - : "memory", "p3" - ); - return output; -} +ATOMIC_OPS(add) +ATOMIC_OPS(sub) -#define atomic_sub(i, v) atomic_sub_return(i, (v)) +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP /** * __atomic_add_unless - add unless the number is a given value diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 0f8bf48dadf3..0bf03501fe5c 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -21,68 +21,100 @@ #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } -#define atomic_read(v) (*(volatile int *)&(v)->counter) -#define atomic64_read(v) (*(volatile long *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) +#define atomic64_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v,i) (((v)->counter) = (i)) #define atomic64_set(v,i) (((v)->counter) = (i)) -static __inline__ int -ia64_atomic_add (int i, atomic_t *v) -{ - __s32 old, new; - CMPXCHG_BUGCHECK_DECL - - do { - CMPXCHG_BUGCHECK(v); - old = atomic_read(v); - new = old + i; - } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); - return new; +#define ATOMIC_OP(op, c_op) \ +static __inline__ int \ +ia64_atomic_##op (int i, atomic_t *v) \ +{ \ + __s32 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \ + return new; \ } -static __inline__ long -ia64_atomic64_add (__s64 i, atomic64_t *v) -{ - __s64 old, new; - CMPXCHG_BUGCHECK_DECL - - do { - CMPXCHG_BUGCHECK(v); - old = atomic64_read(v); - new = old + i; - } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); - return new; -} +ATOMIC_OP(add, +) +ATOMIC_OP(sub, -) -static __inline__ int -ia64_atomic_sub (int i, atomic_t *v) -{ - __s32 old, new; - CMPXCHG_BUGCHECK_DECL - - do { - CMPXCHG_BUGCHECK(v); - old = atomic_read(v); - new = old - i; - } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); - return new; -} +#undef ATOMIC_OP -static __inline__ long -ia64_atomic64_sub (__s64 i, atomic64_t *v) -{ - __s64 old, new; - CMPXCHG_BUGCHECK_DECL - - do { - CMPXCHG_BUGCHECK(v); - old = atomic64_read(v); - new = old - i; - } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); - return new; +#define atomic_add_return(i,v) \ +({ \ + int __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ + : ia64_atomic_add(__ia64_aar_i, v); \ +}) + +#define atomic_sub_return(i,v) \ +({ \ + int __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ + : ia64_atomic_sub(__ia64_asr_i, v); \ +}) + +#define ATOMIC64_OP(op, c_op) \ +static __inline__ long \ +ia64_atomic64_##op (__s64 i, atomic64_t *v) \ +{ \ + __s64 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic64_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \ + return new; \ } +ATOMIC64_OP(add, +) +ATOMIC64_OP(sub, -) + +#undef ATOMIC64_OP + +#define atomic64_add_return(i,v) \ +({ \ + long __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ + : ia64_atomic64_add(__ia64_aar_i, v); \ +}) + +#define atomic64_sub_return(i,v) \ +({ \ + long __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ + : ia64_atomic64_sub(__ia64_asr_i, v); \ +}) + #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) @@ -123,30 +155,6 @@ static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) -#define atomic_add_return(i,v) \ -({ \ - int __ia64_aar_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ - || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ - || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ - || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ - ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ - : ia64_atomic_add(__ia64_aar_i, v); \ -}) - -#define atomic64_add_return(i,v) \ -({ \ - long __ia64_aar_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ - || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ - || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ - || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ - ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ - : ia64_atomic64_add(__ia64_aar_i, v); \ -}) - /* * Atomically add I to V and return TRUE if the resulting value is * negative. @@ -163,30 +171,6 @@ atomic64_add_negative (__s64 i, atomic64_t *v) return atomic64_add_return(i, v) < 0; } -#define atomic_sub_return(i,v) \ -({ \ - int __ia64_asr_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ - || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ - || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ - || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ - ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ - : ia64_atomic_sub(__ia64_asr_i, v); \ -}) - -#define atomic64_sub_return(i,v) \ -({ \ - long __ia64_asr_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ - || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ - || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ - || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ - ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ - : ia64_atomic64_sub(__ia64_asr_i, v); \ -}) - #define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_inc_return(v) atomic_add_return(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) @@ -199,13 +183,13 @@ atomic64_add_negative (__s64 i, atomic64_t *v) #define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0) #define atomic64_inc_and_test(v) (atomic64_add_return(1, (v)) == 0) -#define atomic_add(i,v) atomic_add_return((i), (v)) -#define atomic_sub(i,v) atomic_sub_return((i), (v)) +#define atomic_add(i,v) (void)atomic_add_return((i), (v)) +#define atomic_sub(i,v) (void)atomic_sub_return((i), (v)) #define atomic_inc(v) atomic_add(1, (v)) #define atomic_dec(v) atomic_sub(1, (v)) -#define atomic64_add(i,v) atomic64_add_return((i), (v)) -#define atomic64_sub(i,v) atomic64_sub_return((i), (v)) +#define atomic64_add(i,v) (void)atomic64_add_return((i), (v)) +#define atomic64_sub(i,v) (void)atomic64_sub_return((i), (v)) #define atomic64_inc(v) atomic64_add(1, (v)) #define atomic64_dec(v) atomic64_sub(1, (v)) diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index c7367130ab14..ce53c50d0ba4 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -19,7 +19,6 @@ #include <asm/ptrace.h> #include <asm/ustack.h> -#define __ARCH_WANT_UNLOCKED_CTXSW #define ARCH_HAS_PREFETCH_SWITCH_STACK #define IA64_NUM_PHYS_STACK_REG 96 diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index 8ad0ed4182a5..31bb74adba08 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -28,7 +28,7 @@ * * Atomically reads the value of @v. */ -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) /** * atomic_set - set atomic variable @@ -39,85 +39,64 @@ */ #define atomic_set(v,i) (((v)->counter) = (i)) -/** - * atomic_add_return - add integer to atomic variable and return it - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and return (@i + @v). - */ -static __inline__ int atomic_add_return(int i, atomic_t *v) -{ - unsigned long flags; - int result; - - local_irq_save(flags); - __asm__ __volatile__ ( - "# atomic_add_return \n\t" - DCACHE_CLEAR("%0", "r4", "%1") - M32R_LOCK" %0, @%1; \n\t" - "add %0, %2; \n\t" - M32R_UNLOCK" %0, @%1; \n\t" - : "=&r" (result) - : "r" (&v->counter), "r" (i) - : "memory" #ifdef CONFIG_CHIP_M32700_TS1 - , "r4" -#endif /* CONFIG_CHIP_M32700_TS1 */ - ); - local_irq_restore(flags); - - return result; +#define __ATOMIC_CLOBBER , "r4" +#else +#define __ATOMIC_CLOBBER +#endif + +#define ATOMIC_OP(op) \ +static __inline__ void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int result; \ + \ + local_irq_save(flags); \ + __asm__ __volatile__ ( \ + "# atomic_" #op " \n\t" \ + DCACHE_CLEAR("%0", "r4", "%1") \ + M32R_LOCK" %0, @%1; \n\t" \ + #op " %0, %2; \n\t" \ + M32R_UNLOCK" %0, @%1; \n\t" \ + : "=&r" (result) \ + : "r" (&v->counter), "r" (i) \ + : "memory" \ + __ATOMIC_CLOBBER \ + ); \ + local_irq_restore(flags); \ +} \ + +#define ATOMIC_OP_RETURN(op) \ +static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int result; \ + \ + local_irq_save(flags); \ + __asm__ __volatile__ ( \ + "# atomic_" #op "_return \n\t" \ + DCACHE_CLEAR("%0", "r4", "%1") \ + M32R_LOCK" %0, @%1; \n\t" \ + #op " %0, %2; \n\t" \ + M32R_UNLOCK" %0, @%1; \n\t" \ + : "=&r" (result) \ + : "r" (&v->counter), "r" (i) \ + : "memory" \ + __ATOMIC_CLOBBER \ + ); \ + local_irq_restore(flags); \ + \ + return result; \ } -/** - * atomic_sub_return - subtract integer from atomic variable and return it - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and return (@v - @i). - */ -static __inline__ int atomic_sub_return(int i, atomic_t *v) -{ - unsigned long flags; - int result; - - local_irq_save(flags); - __asm__ __volatile__ ( - "# atomic_sub_return \n\t" - DCACHE_CLEAR("%0", "r4", "%1") - M32R_LOCK" %0, @%1; \n\t" - "sub %0, %2; \n\t" - M32R_UNLOCK" %0, @%1; \n\t" - : "=&r" (result) - : "r" (&v->counter), "r" (i) - : "memory" -#ifdef CONFIG_CHIP_M32700_TS1 - , "r4" -#endif /* CONFIG_CHIP_M32700_TS1 */ - ); - local_irq_restore(flags); - - return result; -} +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v. - */ -#define atomic_add(i,v) ((void) atomic_add_return((i), (v))) +ATOMIC_OPS(add) +ATOMIC_OPS(sub) -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v. - */ -#define atomic_sub(i,v) ((void) atomic_sub_return((i), (v))) +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP /** * atomic_sub_and_test - subtract value from variable and test result @@ -151,9 +130,7 @@ static __inline__ int atomic_inc_return(atomic_t *v) : "=&r" (result) : "r" (&v->counter) : "memory" -#ifdef CONFIG_CHIP_M32700_TS1 - , "r4" -#endif /* CONFIG_CHIP_M32700_TS1 */ + __ATOMIC_CLOBBER ); local_irq_restore(flags); @@ -181,9 +158,7 @@ static __inline__ int atomic_dec_return(atomic_t *v) : "=&r" (result) : "r" (&v->counter) : "memory" -#ifdef CONFIG_CHIP_M32700_TS1 - , "r4" -#endif /* CONFIG_CHIP_M32700_TS1 */ + __ATOMIC_CLOBBER ); local_irq_restore(flags); @@ -280,9 +255,7 @@ static __inline__ void atomic_clear_mask(unsigned long mask, atomic_t *addr) : "=&r" (tmp) : "r" (addr), "r" (~mask) : "memory" -#ifdef CONFIG_CHIP_M32700_TS1 - , "r5" -#endif /* CONFIG_CHIP_M32700_TS1 */ + __ATOMIC_CLOBBER ); local_irq_restore(flags); } @@ -302,9 +275,7 @@ static __inline__ void atomic_set_mask(unsigned long mask, atomic_t *addr) : "=&r" (tmp) : "r" (addr), "r" (mask) : "memory" -#ifdef CONFIG_CHIP_M32700_TS1 - , "r5" -#endif /* CONFIG_CHIP_M32700_TS1 */ + __ATOMIC_CLOBBER ); local_irq_restore(flags); } diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 55695212a2ae..e85f047fb072 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -17,7 +17,7 @@ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v, i) (((v)->counter) = i) /* @@ -30,16 +30,57 @@ #define ASM_DI "di" #endif -static inline void atomic_add(int i, atomic_t *v) -{ - __asm__ __volatile__("addl %1,%0" : "+m" (*v) : ASM_DI (i)); +#define ATOMIC_OP(op, c_op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + __asm__ __volatile__(#asm_op "l %1,%0" : "+m" (*v) : ASM_DI (i));\ +} \ + +#ifdef CONFIG_RMW_INSNS + +#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int t, tmp; \ + \ + __asm__ __volatile__( \ + "1: movel %2,%1\n" \ + " " #asm_op "l %3,%1\n" \ + " casl %2,%1,%0\n" \ + " jne 1b" \ + : "+m" (*v), "=&d" (t), "=&d" (tmp) \ + : "g" (i), "2" (atomic_read(v))); \ + return t; \ } -static inline void atomic_sub(int i, atomic_t *v) -{ - __asm__ __volatile__("subl %1,%0" : "+m" (*v) : ASM_DI (i)); +#else + +#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ +static inline int atomic_##op##_return(int i, atomic_t * v) \ +{ \ + unsigned long flags; \ + int t; \ + \ + local_irq_save(flags); \ + t = (v->counter c_op i); \ + local_irq_restore(flags); \ + \ + return t; \ } +#endif /* CONFIG_RMW_INSNS */ + +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_OP_RETURN(op, c_op, asm_op) + +ATOMIC_OPS(add, +=, add) +ATOMIC_OPS(sub, -=, sub) + +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + static inline void atomic_inc(atomic_t *v) { __asm__ __volatile__("addql #1,%0" : "+m" (*v)); @@ -76,67 +117,11 @@ static inline int atomic_inc_and_test(atomic_t *v) #ifdef CONFIG_RMW_INSNS -static inline int atomic_add_return(int i, atomic_t *v) -{ - int t, tmp; - - __asm__ __volatile__( - "1: movel %2,%1\n" - " addl %3,%1\n" - " casl %2,%1,%0\n" - " jne 1b" - : "+m" (*v), "=&d" (t), "=&d" (tmp) - : "g" (i), "2" (atomic_read(v))); - return t; -} - -static inline int atomic_sub_return(int i, atomic_t *v) -{ - int t, tmp; - - __asm__ __volatile__( - "1: movel %2,%1\n" - " subl %3,%1\n" - " casl %2,%1,%0\n" - " jne 1b" - : "+m" (*v), "=&d" (t), "=&d" (tmp) - : "g" (i), "2" (atomic_read(v))); - return t; -} - #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) #else /* !CONFIG_RMW_INSNS */ -static inline int atomic_add_return(int i, atomic_t * v) -{ - unsigned long flags; - int t; - - local_irq_save(flags); - t = atomic_read(v); - t += i; - atomic_set(v, t); - local_irq_restore(flags); - - return t; -} - -static inline int atomic_sub_return(int i, atomic_t * v) -{ - unsigned long flags; - int t; - - local_irq_save(flags); - t = atomic_read(v); - t -= i; - atomic_set(v, t); - local_irq_restore(flags); - - return t; -} - static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { unsigned long flags; diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h index d2e60a18986c..948d8688643c 100644 --- a/arch/metag/include/asm/atomic_lnkget.h +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -27,85 +27,56 @@ static inline int atomic_read(const atomic_t *v) return temp; } -static inline void atomic_add(int i, atomic_t *v) -{ - int temp; - - asm volatile ( - "1: LNKGETD %0, [%1]\n" - " ADD %0, %0, %2\n" - " LNKSETD [%1], %0\n" - " DEFR %0, TXSTAT\n" - " ANDT %0, %0, #HI(0x3f000000)\n" - " CMPT %0, #HI(0x02000000)\n" - " BNZ 1b\n" - : "=&d" (temp) - : "da" (&v->counter), "bd" (i) - : "cc"); +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + int temp; \ + \ + asm volatile ( \ + "1: LNKGETD %0, [%1]\n" \ + " " #op " %0, %0, %2\n" \ + " LNKSETD [%1], %0\n" \ + " DEFR %0, TXSTAT\n" \ + " ANDT %0, %0, #HI(0x3f000000)\n" \ + " CMPT %0, #HI(0x02000000)\n" \ + " BNZ 1b\n" \ + : "=&d" (temp) \ + : "da" (&v->counter), "bd" (i) \ + : "cc"); \ +} \ + +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int result, temp; \ + \ + smp_mb(); \ + \ + asm volatile ( \ + "1: LNKGETD %1, [%2]\n" \ + " " #op " %1, %1, %3\n" \ + " LNKSETD [%2], %1\n" \ + " DEFR %0, TXSTAT\n" \ + " ANDT %0, %0, #HI(0x3f000000)\n" \ + " CMPT %0, #HI(0x02000000)\n" \ + " BNZ 1b\n" \ + : "=&d" (temp), "=&da" (result) \ + : "da" (&v->counter), "bd" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return result; \ } -static inline void atomic_sub(int i, atomic_t *v) -{ - int temp; +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) - asm volatile ( - "1: LNKGETD %0, [%1]\n" - " SUB %0, %0, %2\n" - " LNKSETD [%1], %0\n" - " DEFR %0, TXSTAT\n" - " ANDT %0, %0, #HI(0x3f000000)\n" - " CMPT %0, #HI(0x02000000)\n" - " BNZ 1b\n" - : "=&d" (temp) - : "da" (&v->counter), "bd" (i) - : "cc"); -} +ATOMIC_OPS(add) +ATOMIC_OPS(sub) -static inline int atomic_add_return(int i, atomic_t *v) -{ - int result, temp; - - smp_mb(); - - asm volatile ( - "1: LNKGETD %1, [%2]\n" - " ADD %1, %1, %3\n" - " LNKSETD [%2], %1\n" - " DEFR %0, TXSTAT\n" - " ANDT %0, %0, #HI(0x3f000000)\n" - " CMPT %0, #HI(0x02000000)\n" - " BNZ 1b\n" - : "=&d" (temp), "=&da" (result) - : "da" (&v->counter), "bd" (i) - : "cc"); - - smp_mb(); - - return result; -} - -static inline int atomic_sub_return(int i, atomic_t *v) -{ - int result, temp; - - smp_mb(); - - asm volatile ( - "1: LNKGETD %1, [%2]\n" - " SUB %1, %1, %3\n" - " LNKSETD [%2], %1\n" - " DEFR %0, TXSTAT\n" - " ANDT %0, %0, #HI(0x3f000000)\n" - " CMPT %0, #HI(0x02000000)\n" - " BNZ 1b\n" - : "=&d" (temp), "=&da" (result) - : "da" (&v->counter), "bd" (i) - : "cc"); - - smp_mb(); - - return result; -} +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) { diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h index e578955e674b..f5d5898c1020 100644 --- a/arch/metag/include/asm/atomic_lock1.h +++ b/arch/metag/include/asm/atomic_lock1.h @@ -37,55 +37,41 @@ static inline int atomic_set(atomic_t *v, int i) return i; } -static inline void atomic_add(int i, atomic_t *v) -{ - unsigned long flags; - - __global_lock1(flags); - fence(); - v->counter += i; - __global_unlock1(flags); +#define ATOMIC_OP(op, c_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + \ + __global_lock1(flags); \ + fence(); \ + v->counter c_op i; \ + __global_unlock1(flags); \ +} \ + +#define ATOMIC_OP_RETURN(op, c_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long result; \ + unsigned long flags; \ + \ + __global_lock1(flags); \ + result = v->counter; \ + result c_op i; \ + fence(); \ + v->counter = result; \ + __global_unlock1(flags); \ + \ + return result; \ } -static inline void atomic_sub(int i, atomic_t *v) -{ - unsigned long flags; +#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) - __global_lock1(flags); - fence(); - v->counter -= i; - __global_unlock1(flags); -} - -static inline int atomic_add_return(int i, atomic_t *v) -{ - unsigned long result; - unsigned long flags; +ATOMIC_OPS(add, +=) +ATOMIC_OPS(sub, -=) - __global_lock1(flags); - result = v->counter; - result += i; - fence(); - v->counter = result; - __global_unlock1(flags); - - return result; -} - -static inline int atomic_sub_return(int i, atomic_t *v) -{ - unsigned long result; - unsigned long flags; - - __global_lock1(flags); - result = v->counter; - result -= i; - fence(); - v->counter = result; - __global_unlock1(flags); - - return result; -} +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) { diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 37b2befe651a..6dd6bfc607e9 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -29,7 +29,7 @@ * * Atomically reads the value of @v. */ -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) /* * atomic_set - set atomic variable @@ -40,195 +40,103 @@ */ #define atomic_set(v, i) ((v)->counter = (i)) -/* - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v. - */ -static __inline__ void atomic_add(int i, atomic_t * v) -{ - if (kernel_uses_llsc && R10000_LLSC_WAR) { - int temp; - - __asm__ __volatile__( - " .set arch=r4000 \n" - "1: ll %0, %1 # atomic_add \n" - " addu %0, %2 \n" - " sc %0, %1 \n" - " beqzl %0, 1b \n" - " .set mips0 \n" - : "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } else if (kernel_uses_llsc) { - int temp; - - do { - __asm__ __volatile__( - " .set arch=r4000 \n" - " ll %0, %1 # atomic_add \n" - " addu %0, %2 \n" - " sc %0, %1 \n" - " .set mips0 \n" - : "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } while (unlikely(!temp)); - } else { - unsigned long flags; - - raw_local_irq_save(flags); - v->counter += i; - raw_local_irq_restore(flags); - } -} - -/* - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v. - */ -static __inline__ void atomic_sub(int i, atomic_t * v) -{ - if (kernel_uses_llsc && R10000_LLSC_WAR) { - int temp; - - __asm__ __volatile__( - " .set arch=r4000 \n" - "1: ll %0, %1 # atomic_sub \n" - " subu %0, %2 \n" - " sc %0, %1 \n" - " beqzl %0, 1b \n" - " .set mips0 \n" - : "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } else if (kernel_uses_llsc) { - int temp; - - do { - __asm__ __volatile__( - " .set arch=r4000 \n" - " ll %0, %1 # atomic_sub \n" - " subu %0, %2 \n" - " sc %0, %1 \n" - " .set mips0 \n" - : "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } while (unlikely(!temp)); - } else { - unsigned long flags; - - raw_local_irq_save(flags); - v->counter -= i; - raw_local_irq_restore(flags); - } -} - -/* - * Same as above, but return the result value - */ -static __inline__ int atomic_add_return(int i, atomic_t * v) -{ - int result; - - smp_mb__before_llsc(); - - if (kernel_uses_llsc && R10000_LLSC_WAR) { - int temp; - - __asm__ __volatile__( - " .set arch=r4000 \n" - "1: ll %1, %2 # atomic_add_return \n" - " addu %0, %1, %3 \n" - " sc %0, %2 \n" - " beqzl %0, 1b \n" - " addu %0, %1, %3 \n" - " .set mips0 \n" - : "=&r" (result), "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } else if (kernel_uses_llsc) { - int temp; - - do { - __asm__ __volatile__( - " .set arch=r4000 \n" - " ll %1, %2 # atomic_add_return \n" - " addu %0, %1, %3 \n" - " sc %0, %2 \n" - " .set mips0 \n" - : "=&r" (result), "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } while (unlikely(!result)); - - result = temp + i; - } else { - unsigned long flags; - - raw_local_irq_save(flags); - result = v->counter; - result += i; - v->counter = result; - raw_local_irq_restore(flags); - } - - smp_llsc_mb(); - - return result; +#define ATOMIC_OP(op, c_op, asm_op) \ +static __inline__ void atomic_##op(int i, atomic_t * v) \ +{ \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + int temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: ll %0, %1 # atomic_" #op " \n" \ + " " #asm_op " %0, %2 \n" \ + " sc %0, %1 \n" \ + " beqzl %0, 1b \n" \ + " .set mips0 \n" \ + : "=&r" (temp), "+m" (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + int temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + " ll %0, %1 # atomic_" #op "\n" \ + " " #asm_op " %0, %2 \n" \ + " sc %0, %1 \n" \ + " .set mips0 \n" \ + : "=&r" (temp), "+m" (v->counter) \ + : "Ir" (i)); \ + } while (unlikely(!temp)); \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + } \ +} \ + +#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ +static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ +{ \ + int result; \ + \ + smp_mb__before_llsc(); \ + \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + int temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: ll %1, %2 # atomic_" #op "_return \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc %0, %2 \n" \ + " beqzl %0, 1b \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), "+m" (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + int temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + " ll %1, %2 # atomic_" #op "_return \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc %0, %2 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), "+m" (v->counter) \ + : "Ir" (i)); \ + } while (unlikely(!result)); \ + \ + result = temp; result c_op i; \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + result c_op i; \ + v->counter = result; \ + raw_local_irq_restore(flags); \ + } \ + \ + smp_llsc_mb(); \ + \ + return result; \ } -static __inline__ int atomic_sub_return(int i, atomic_t * v) -{ - int result; +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_OP_RETURN(op, c_op, asm_op) - smp_mb__before_llsc(); +ATOMIC_OPS(add, +=, addu) +ATOMIC_OPS(sub, -=, subu) - if (kernel_uses_llsc && R10000_LLSC_WAR) { - int temp; - - __asm__ __volatile__( - " .set arch=r4000 \n" - "1: ll %1, %2 # atomic_sub_return \n" - " subu %0, %1, %3 \n" - " sc %0, %2 \n" - " beqzl %0, 1b \n" - " subu %0, %1, %3 \n" - " .set mips0 \n" - : "=&r" (result), "=&r" (temp), "=m" (v->counter) - : "Ir" (i), "m" (v->counter) - : "memory"); - - result = temp - i; - } else if (kernel_uses_llsc) { - int temp; - - do { - __asm__ __volatile__( - " .set arch=r4000 \n" - " ll %1, %2 # atomic_sub_return \n" - " subu %0, %1, %3 \n" - " sc %0, %2 \n" - " .set mips0 \n" - : "=&r" (result), "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } while (unlikely(!result)); - - result = temp - i; - } else { - unsigned long flags; - - raw_local_irq_save(flags); - result = v->counter; - result -= i; - v->counter = result; - raw_local_irq_restore(flags); - } - - smp_llsc_mb(); - - return result; -} +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP /* * atomic_sub_if_positive - conditionally subtract integer from atomic variable @@ -398,7 +306,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) * @v: pointer of type atomic64_t * */ -#define atomic64_read(v) (*(volatile long *)&(v)->counter) +#define atomic64_read(v) ACCESS_ONCE((v)->counter) /* * atomic64_set - set atomic variable @@ -407,195 +315,104 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) */ #define atomic64_set(v, i) ((v)->counter = (i)) -/* - * atomic64_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic64_t - * - * Atomically adds @i to @v. - */ -static __inline__ void atomic64_add(long i, atomic64_t * v) -{ - if (kernel_uses_llsc && R10000_LLSC_WAR) { - long temp; - - __asm__ __volatile__( - " .set arch=r4000 \n" - "1: lld %0, %1 # atomic64_add \n" - " daddu %0, %2 \n" - " scd %0, %1 \n" - " beqzl %0, 1b \n" - " .set mips0 \n" - : "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } else if (kernel_uses_llsc) { - long temp; - - do { - __asm__ __volatile__( - " .set arch=r4000 \n" - " lld %0, %1 # atomic64_add \n" - " daddu %0, %2 \n" - " scd %0, %1 \n" - " .set mips0 \n" - : "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } while (unlikely(!temp)); - } else { - unsigned long flags; - - raw_local_irq_save(flags); - v->counter += i; - raw_local_irq_restore(flags); - } +#define ATOMIC64_OP(op, c_op, asm_op) \ +static __inline__ void atomic64_##op(long i, atomic64_t * v) \ +{ \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + long temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: lld %0, %1 # atomic64_" #op " \n" \ + " " #asm_op " %0, %2 \n" \ + " scd %0, %1 \n" \ + " beqzl %0, 1b \n" \ + " .set mips0 \n" \ + : "=&r" (temp), "+m" (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + long temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + " lld %0, %1 # atomic64_" #op "\n" \ + " " #asm_op " %0, %2 \n" \ + " scd %0, %1 \n" \ + " .set mips0 \n" \ + : "=&r" (temp), "+m" (v->counter) \ + : "Ir" (i)); \ + } while (unlikely(!temp)); \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + } \ +} \ + +#define ATOMIC64_OP_RETURN(op, c_op, asm_op) \ +static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ +{ \ + long result; \ + \ + smp_mb__before_llsc(); \ + \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + long temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: lld %1, %2 # atomic64_" #op "_return\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " scd %0, %2 \n" \ + " beqzl %0, 1b \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), "+m" (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + long temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + " lld %1, %2 # atomic64_" #op "_return\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " scd %0, %2 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), "=m" (v->counter) \ + : "Ir" (i), "m" (v->counter) \ + : "memory"); \ + } while (unlikely(!result)); \ + \ + result = temp; result c_op i; \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + result c_op i; \ + v->counter = result; \ + raw_local_irq_restore(flags); \ + } \ + \ + smp_llsc_mb(); \ + \ + return result; \ } -/* - * atomic64_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic64_t - * - * Atomically subtracts @i from @v. - */ -static __inline__ void atomic64_sub(long i, atomic64_t * v) -{ - if (kernel_uses_llsc && R10000_LLSC_WAR) { - long temp; - - __asm__ __volatile__( - " .set arch=r4000 \n" - "1: lld %0, %1 # atomic64_sub \n" - " dsubu %0, %2 \n" - " scd %0, %1 \n" - " beqzl %0, 1b \n" - " .set mips0 \n" - : "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } else if (kernel_uses_llsc) { - long temp; - - do { - __asm__ __volatile__( - " .set arch=r4000 \n" - " lld %0, %1 # atomic64_sub \n" - " dsubu %0, %2 \n" - " scd %0, %1 \n" - " .set mips0 \n" - : "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } while (unlikely(!temp)); - } else { - unsigned long flags; - - raw_local_irq_save(flags); - v->counter -= i; - raw_local_irq_restore(flags); - } -} - -/* - * Same as above, but return the result value - */ -static __inline__ long atomic64_add_return(long i, atomic64_t * v) -{ - long result; +#define ATOMIC64_OPS(op, c_op, asm_op) \ + ATOMIC64_OP(op, c_op, asm_op) \ + ATOMIC64_OP_RETURN(op, c_op, asm_op) - smp_mb__before_llsc(); +ATOMIC64_OPS(add, +=, daddu) +ATOMIC64_OPS(sub, -=, dsubu) - if (kernel_uses_llsc && R10000_LLSC_WAR) { - long temp; - - __asm__ __volatile__( - " .set arch=r4000 \n" - "1: lld %1, %2 # atomic64_add_return \n" - " daddu %0, %1, %3 \n" - " scd %0, %2 \n" - " beqzl %0, 1b \n" - " daddu %0, %1, %3 \n" - " .set mips0 \n" - : "=&r" (result), "=&r" (temp), "+m" (v->counter) - : "Ir" (i)); - } else if (kernel_uses_llsc) { - long temp; - - do { - __asm__ __volatile__( - " .set arch=r4000 \n" - " lld %1, %2 # atomic64_add_return \n" - " daddu %0, %1, %3 \n" - " scd %0, %2 \n" - " .set mips0 \n" - : "=&r" (result), "=&r" (temp), "=m" (v->counter) - : "Ir" (i), "m" (v->counter) - : "memory"); - } while (unlikely(!result)); - - result = temp + i; - } else { - unsigned long flags; - - raw_local_irq_save(flags); - result = v->counter; - result += i; - v->counter = result; - raw_local_irq_restore(flags); - } - - smp_llsc_mb(); - - return result; -} - -static __inline__ long atomic64_sub_return(long i, atomic64_t * v) -{ - long result; - - smp_mb__before_llsc(); - - if (kernel_uses_llsc && R10000_LLSC_WAR) { - long temp; - - __asm__ __volatile__( - " .set arch=r4000 \n" - "1: lld %1, %2 # atomic64_sub_return \n" - " dsubu %0, %1, %3 \n" - " scd %0, %2 \n" - " beqzl %0, 1b \n" - " dsubu %0, %1, %3 \n" - " .set mips0 \n" - : "=&r" (result), "=&r" (temp), "=m" (v->counter) - : "Ir" (i), "m" (v->counter) - : "memory"); - } else if (kernel_uses_llsc) { - long temp; - - do { - __asm__ __volatile__( - " .set arch=r4000 \n" - " lld %1, %2 # atomic64_sub_return \n" - " dsubu %0, %1, %3 \n" - " scd %0, %2 \n" - " .set mips0 \n" - : "=&r" (result), "=&r" (temp), "=m" (v->counter) - : "Ir" (i), "m" (v->counter) - : "memory"); - } while (unlikely(!result)); - - result = temp - i; - } else { - unsigned long flags; - - raw_local_irq_save(flags); - result = v->counter; - result -= i; - v->counter = result; - raw_local_irq_restore(flags); - } - - smp_llsc_mb(); - - return result; -} +#undef ATOMIC64_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP /* * atomic64_sub_if_positive - conditionally subtract integer from atomic variable diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 05f08438a7c4..f1df4cb4a286 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -397,12 +397,6 @@ unsigned long get_wchan(struct task_struct *p); #define ARCH_HAS_PREFETCHW #define prefetchw(x) __builtin_prefetch((x), 1, 1) -/* - * See Documentation/scheduler/sched-arch.txt; prevents deadlock on SMP - * systems. - */ -#define __ARCH_WANT_UNLOCKED_CTXSW - #endif #endif /* _ASM_PROCESSOR_H */ diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 645b3c4fcfba..f7aac5b57b4b 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -770,7 +770,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) long ret = 0; user_exit(); - if (secure_computing(syscall) == -1) + if (secure_computing() == -1) return -1; if (test_thread_flag(TIF_SYSCALL_TRACE) && diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index cadeb1e2cdfc..5be655e83e70 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -33,7 +33,6 @@ * @v: pointer of type atomic_t * * Atomically reads the value of @v. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. */ #define atomic_read(v) (ACCESS_ONCE((v)->counter)) @@ -43,102 +42,62 @@ * @i: required value * * Atomically sets the value of @v to @i. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. */ #define atomic_set(v, i) (((v)->counter) = (i)) -/** - * atomic_add_return - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns the result - * Note that the guaranteed useful range of an atomic_t is only 24 bits. - */ -static inline int atomic_add_return(int i, atomic_t *v) -{ - int retval; -#ifdef CONFIG_SMP - int status; - - asm volatile( - "1: mov %4,(_AAR,%3) \n" - " mov (_ADR,%3),%1 \n" - " add %5,%1 \n" - " mov %1,(_ADR,%3) \n" - " mov (_ADR,%3),%0 \n" /* flush */ - " mov (_ASR,%3),%0 \n" - " or %0,%0 \n" - " bne 1b \n" - : "=&r"(status), "=&r"(retval), "=m"(v->counter) - : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i) - : "memory", "cc"); - -#else - unsigned long flags; +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + int retval, status; \ + \ + asm volatile( \ + "1: mov %4,(_AAR,%3) \n" \ + " mov (_ADR,%3),%1 \n" \ + " " #op " %5,%1 \n" \ + " mov %1,(_ADR,%3) \n" \ + " mov (_ADR,%3),%0 \n" /* flush */ \ + " mov (_ASR,%3),%0 \n" \ + " or %0,%0 \n" \ + " bne 1b \n" \ + : "=&r"(status), "=&r"(retval), "=m"(v->counter) \ + : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i) \ + : "memory", "cc"); \ +} - flags = arch_local_cli_save(); - retval = v->counter; - retval += i; - v->counter = retval; - arch_local_irq_restore(flags); -#endif - return retval; +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int retval, status; \ + \ + asm volatile( \ + "1: mov %4,(_AAR,%3) \n" \ + " mov (_ADR,%3),%1 \n" \ + " " #op " %5,%1 \n" \ + " mov %1,(_ADR,%3) \n" \ + " mov (_ADR,%3),%0 \n" /* flush */ \ + " mov (_ASR,%3),%0 \n" \ + " or %0,%0 \n" \ + " bne 1b \n" \ + : "=&r"(status), "=&r"(retval), "=m"(v->counter) \ + : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i) \ + : "memory", "cc"); \ + return retval; \ } -/** - * atomic_sub_return - subtract integer from atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns the result - * Note that the guaranteed useful range of an atomic_t is only 24 bits. - */ -static inline int atomic_sub_return(int i, atomic_t *v) -{ - int retval; -#ifdef CONFIG_SMP - int status; +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) - asm volatile( - "1: mov %4,(_AAR,%3) \n" - " mov (_ADR,%3),%1 \n" - " sub %5,%1 \n" - " mov %1,(_ADR,%3) \n" - " mov (_ADR,%3),%0 \n" /* flush */ - " mov (_ASR,%3),%0 \n" - " or %0,%0 \n" - " bne 1b \n" - : "=&r"(status), "=&r"(retval), "=m"(v->counter) - : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i) - : "memory", "cc"); +ATOMIC_OPS(add) +ATOMIC_OPS(sub) -#else - unsigned long flags; - flags = arch_local_cli_save(); - retval = v->counter; - retval -= i; - v->counter = retval; - arch_local_irq_restore(flags); -#endif - return retval; -} +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP static inline int atomic_add_negative(int i, atomic_t *v) { return atomic_add_return(i, v) < 0; } -static inline void atomic_add(int i, atomic_t *v) -{ - atomic_add_return(i, v); -} - -static inline void atomic_sub(int i, atomic_t *v) -{ - atomic_sub_return(i, v); -} - static inline void atomic_inc(atomic_t *v) { atomic_add_return(1, v); diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 0be2db2c7d44..226f8ca993f6 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -55,24 +55,7 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; * are atomic, so a reader never sees inconsistent values. */ -/* It's possible to reduce all atomic operations to either - * __atomic_add_return, atomic_set and atomic_read (the latter - * is there only for consistency). - */ - -static __inline__ int __atomic_add_return(int i, atomic_t *v) -{ - int ret; - unsigned long flags; - _atomic_spin_lock_irqsave(v, flags); - - ret = (v->counter += i); - - _atomic_spin_unlock_irqrestore(v, flags); - return ret; -} - -static __inline__ void atomic_set(atomic_t *v, int i) +static __inline__ void atomic_set(atomic_t *v, int i) { unsigned long flags; _atomic_spin_lock_irqsave(v, flags); @@ -84,7 +67,7 @@ static __inline__ void atomic_set(atomic_t *v, int i) static __inline__ int atomic_read(const atomic_t *v) { - return (*(volatile int *)&(v)->counter); + return ACCESS_ONCE((v)->counter); } /* exported interface */ @@ -115,16 +98,43 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) return c; } +#define ATOMIC_OP(op, c_op) \ +static __inline__ void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + v->counter c_op i; \ + _atomic_spin_unlock_irqrestore(v, flags); \ +} \ + +#define ATOMIC_OP_RETURN(op, c_op) \ +static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int ret; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + ret = (v->counter c_op i); \ + _atomic_spin_unlock_irqrestore(v, flags); \ + \ + return ret; \ +} + +#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) + +ATOMIC_OPS(add, +=) +ATOMIC_OPS(sub, -=) + +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP -#define atomic_add(i,v) ((void)(__atomic_add_return( (i),(v)))) -#define atomic_sub(i,v) ((void)(__atomic_add_return(-((int) (i)),(v)))) -#define atomic_inc(v) ((void)(__atomic_add_return( 1,(v)))) -#define atomic_dec(v) ((void)(__atomic_add_return( -1,(v)))) +#define atomic_inc(v) (atomic_add( 1,(v))) +#define atomic_dec(v) (atomic_add( -1,(v))) -#define atomic_add_return(i,v) (__atomic_add_return( (i),(v))) -#define atomic_sub_return(i,v) (__atomic_add_return(-(i),(v))) -#define atomic_inc_return(v) (__atomic_add_return( 1,(v))) -#define atomic_dec_return(v) (__atomic_add_return( -1,(v))) +#define atomic_inc_return(v) (atomic_add_return( 1,(v))) +#define atomic_dec_return(v) (atomic_add_return( -1,(v))) #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) @@ -148,18 +158,37 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) #define ATOMIC64_INIT(i) { (i) } -static __inline__ s64 -__atomic64_add_return(s64 i, atomic64_t *v) -{ - s64 ret; - unsigned long flags; - _atomic_spin_lock_irqsave(v, flags); +#define ATOMIC64_OP(op, c_op) \ +static __inline__ void atomic64_##op(s64 i, atomic64_t *v) \ +{ \ + unsigned long flags; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + v->counter c_op i; \ + _atomic_spin_unlock_irqrestore(v, flags); \ +} \ + +#define ATOMIC64_OP_RETURN(op, c_op) \ +static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \ +{ \ + unsigned long flags; \ + s64 ret; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + ret = (v->counter c_op i); \ + _atomic_spin_unlock_irqrestore(v, flags); \ + \ + return ret; \ +} - ret = (v->counter += i); +#define ATOMIC64_OPS(op, c_op) ATOMIC64_OP(op, c_op) ATOMIC64_OP_RETURN(op, c_op) - _atomic_spin_unlock_irqrestore(v, flags); - return ret; -} +ATOMIC64_OPS(add, +=) +ATOMIC64_OPS(sub, -=) + +#undef ATOMIC64_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP static __inline__ void atomic64_set(atomic64_t *v, s64 i) @@ -175,18 +204,14 @@ atomic64_set(atomic64_t *v, s64 i) static __inline__ s64 atomic64_read(const atomic64_t *v) { - return (*(volatile long *)&(v)->counter); + return ACCESS_ONCE((v)->counter); } -#define atomic64_add(i,v) ((void)(__atomic64_add_return( ((s64)(i)),(v)))) -#define atomic64_sub(i,v) ((void)(__atomic64_add_return(-((s64)(i)),(v)))) -#define atomic64_inc(v) ((void)(__atomic64_add_return( 1,(v)))) -#define atomic64_dec(v) ((void)(__atomic64_add_return( -1,(v)))) +#define atomic64_inc(v) (atomic64_add( 1,(v))) +#define atomic64_dec(v) (atomic64_add( -1,(v))) -#define atomic64_add_return(i,v) (__atomic64_add_return( ((s64)(i)),(v))) -#define atomic64_sub_return(i,v) (__atomic64_add_return(-((s64)(i)),(v))) -#define atomic64_inc_return(v) (__atomic64_add_return( 1,(v))) -#define atomic64_dec_return(v) (__atomic64_add_return( -1,(v))) +#define atomic64_inc_return(v) (atomic64_add_return( 1,(v))) +#define atomic64_dec_return(v) (atomic64_add_return( -1,(v))) #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h index f5645d6a89f2..10df7079f4cd 100644 --- a/arch/parisc/include/uapi/asm/signal.h +++ b/arch/parisc/include/uapi/asm/signal.h @@ -8,12 +8,12 @@ #define SIGTRAP 5 #define SIGABRT 6 #define SIGIOT 6 -#define SIGEMT 7 +#define SIGSTKFLT 7 #define SIGFPE 8 #define SIGKILL 9 #define SIGBUS 10 #define SIGSEGV 11 -#define SIGSYS 12 /* Linux doesn't use this */ +#define SIGXCPU 12 #define SIGPIPE 13 #define SIGALRM 14 #define SIGTERM 15 @@ -32,16 +32,12 @@ #define SIGTTIN 27 #define SIGTTOU 28 #define SIGURG 29 -#define SIGLOST 30 /* Linux doesn't use this either */ -#define SIGUNUSED 31 -#define SIGRESERVE SIGUNUSED - -#define SIGXCPU 33 -#define SIGXFSZ 34 -#define SIGSTKFLT 36 +#define SIGXFSZ 30 +#define SIGUNUSED 31 +#define SIGSYS 31 /* Linux doesn't use this */ /* These should not be considered constants from userland. */ -#define SIGRTMIN 37 +#define SIGRTMIN 32 #define SIGRTMAX _NSIG /* it's 44 under HP/UX */ /* diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4bc7b62fb4b6..88eace4e28c3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -147,6 +147,7 @@ config PPC select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN @@ -182,7 +183,7 @@ config SCHED_OMIT_FRAME_POINTER config ARCH_MAY_HAVE_PC_FDC bool - default !PPC_PSERIES || PCI + default PCI config PPC_OF def_bool y @@ -287,6 +288,10 @@ config PPC_EMULATE_SSTEP bool default y if KPROBES || UPROBES || XMON || HAVE_HW_BREAKPOINT +config ZONE_DMA32 + bool + default y if PPC64 + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -603,6 +608,10 @@ config PPC_SUBPAGE_PROT to set access permissions (read/write, readonly, or no access) on the 4k subpages of each 64k page. +config PPC_COPRO_BASE + bool + default n + config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on PPC64 && SMP diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5687e299d0a5..132d9c681d6a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -135,6 +135,7 @@ CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4) CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5) CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6) CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7) +CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8) # Altivec option not allowed with e500mc64 in GCC. ifeq ($(CONFIG_ALTIVEC),y) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ccc25eddbcb8..8a5bc1cfc6aa 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -389,7 +389,12 @@ $(obj)/zImage: $(addprefix $(obj)/, $(image-y)) $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) @rm -f $@; ln $< $@ +# Only install the vmlinux install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) + sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" + +# Install the vmlinux and other built boot targets. +zInstall: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^ # anything not in $(targets) diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index 97479f0ce630..aecee9690a88 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -410,7 +410,7 @@ /include/ "qoriq-gpio-3.dtsi" /include/ "qoriq-usb2-mph-0.dtsi" usb0: usb@210000 { - compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph"; + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; fsl,iommu-parent = <&pamu1>; fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */ phy_type = "utmi"; @@ -418,7 +418,7 @@ }; /include/ "qoriq-usb2-dr-0.dtsi" usb1: usb@211000 { - compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr"; + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; fsl,iommu-parent = <&pamu1>; fsl,liodn-reg = <&guts 0x524>; /* USB1LIODNR */ dr_mode = "host"; diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi index a3d582e0361a..7e2fc7cdce48 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi @@ -498,13 +498,13 @@ /include/ "qoriq-gpio-3.dtsi" /include/ "qoriq-usb2-mph-0.dtsi" usb0: usb@210000 { - compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph"; + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; phy_type = "utmi"; port0; }; /include/ "qoriq-usb2-dr-0.dtsi" usb1: usb@211000 { - compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr"; + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; dr_mode = "host"; phy_type = "utmi"; }; diff --git a/arch/powerpc/boot/dts/t1040rdb.dts b/arch/powerpc/boot/dts/t1040rdb.dts new file mode 100644 index 000000000000..79a0bed04c1a --- /dev/null +++ b/arch/powerpc/boot/dts/t1040rdb.dts @@ -0,0 +1,48 @@ +/* + * T1040RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t104xsi-pre.dtsi" +/include/ "t104xrdb.dtsi" + +/ { + model = "fsl,T1040RDB"; + compatible = "fsl,T1040RDB"; + ifc: localbus@ffe124000 { + cpld@3,0 { + compatible = "fsl,t1040rdb-cpld"; + }; + }; +}; + +/include/ "fsl/t1040si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t1042rdb.dts b/arch/powerpc/boot/dts/t1042rdb.dts new file mode 100644 index 000000000000..738c23790e94 --- /dev/null +++ b/arch/powerpc/boot/dts/t1042rdb.dts @@ -0,0 +1,48 @@ +/* + * T1042RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t104xsi-pre.dtsi" +/include/ "t104xrdb.dtsi" + +/ { + model = "fsl,T1042RDB"; + compatible = "fsl,T1042RDB"; + ifc: localbus@ffe124000 { + cpld@3,0 { + compatible = "fsl,t1042rdb-cpld"; + }; + }; +}; + +/include/ "fsl/t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t1042rdb_pi.dts b/arch/powerpc/boot/dts/t1042rdb_pi.dts new file mode 100644 index 000000000000..634f751fa6d3 --- /dev/null +++ b/arch/powerpc/boot/dts/t1042rdb_pi.dts @@ -0,0 +1,57 @@ +/* + * T1042RDB_PI Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t104xsi-pre.dtsi" +/include/ "t104xrdb.dtsi" + +/ { + model = "fsl,T1042RDB_PI"; + compatible = "fsl,T1042RDB_PI"; + ifc: localbus@ffe124000 { + cpld@3,0 { + compatible = "fsl,t1042rdb_pi-cpld"; + }; + }; + soc: soc@ffe000000 { + i2c@118000 { + rtc@68 { + compatible = "dallas,ds1337"; + reg = <0x68>; + interrupts = <0x2 0x1 0 0>; + }; + }; + }; +}; + +/include/ "fsl/t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t104xrdb.dtsi b/arch/powerpc/boot/dts/t104xrdb.dtsi new file mode 100644 index 000000000000..1cf0f3c5f7e5 --- /dev/null +++ b/arch/powerpc/boot/dts/t104xrdb.dtsi @@ -0,0 +1,156 @@ +/* + * T1040RDB/T1042RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/ { + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xff800000 0x00010000 + 3 0 0xf 0xffdf0000 0x00008000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + }; + + nand@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + + cpld@3,0 { + reg = <3 0 0x300>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "micron,n25q512a"; + reg = <0>; + spi-max-frequency = <10000000>; /* input clock */ + }; + }; + + i2c@118100 { + pca9546@77 { + compatible = "nxp,pca9546"; + reg = <0x77>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; + + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0x0 0xe0000000 0xc 0x10000000 0x0 0x10000000 + 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci3: pcie@ffe270000 { + reg = <0xf 0xfe270000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 45fd06cdc3e8..7a7b3c879f96 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -18,6 +18,7 @@ CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_PS3=y diff --git a/arch/powerpc/configs/celleb_defconfig b/arch/powerpc/configs/celleb_defconfig index 77d7bf3ca2ac..acccbfde8a50 100644 --- a/arch/powerpc/configs/celleb_defconfig +++ b/arch/powerpc/configs/celleb_defconfig @@ -15,6 +15,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_CELLEB=y diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index 6a3c58adf253..688e9e4d29a1 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -165,6 +165,8 @@ CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m CONFIG_MAGIC_SYSRQ=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 269d6e47c67d..6db97e4414b2 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -50,7 +50,6 @@ CONFIG_NET_IPIP=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_ARPD=y CONFIG_INET_ESP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set @@ -60,33 +59,17 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLKDEVS=y CONFIG_MTD_BLOCK=y CONFIG_FTL=y CONFIG_MTD_CFI=y -CONFIG_MTD_GEN_PROBE=y -CONFIG_MTD_MAP_BANK_WIDTH_1=y -CONFIG_MTD_MAP_BANK_WIDTH_2=y -CONFIG_MTD_MAP_BANK_WIDTH_4=y -CONFIG_MTD_CFI_I1=y -CONFIG_MTD_CFI_I2=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_M25P80=y -CONFIG_MTD_CFI_UTIL=y -CONFIG_MTD_NAND_ECC=y CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_IDS=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y CONFIG_MTD_UBI=y -CONFIG_MTD_UBI_WL_THRESHOLD=4096 -CONFIG_MTD_UBI_BEB_RESERVE=1 -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 @@ -102,6 +85,7 @@ CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set CONFIG_SERIO_LIBPS2=y +CONFIG_PPC_EPAPR_HV_BYTECHAN=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_MANY_PORTS=y @@ -115,7 +99,6 @@ CONFIG_SPI_GPIO=y CONFIG_SPI_FSL_SPI=y CONFIG_SPI_FSL_ESPI=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_USB_HID=m CONFIG_USB=y CONFIG_USB_MON=y @@ -124,14 +107,17 @@ CONFIG_USB_EHCI_FSL=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y +CONFIG_EDAC=y +CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y -CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_VIRT_DRIVERS=y +CONFIG_FSL_HV_MANAGER=y +CONFIG_FSL_CORENET_CF=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=m @@ -144,35 +130,24 @@ CONFIG_NTFS_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y -CONFIG_MISC_FILESYSTEMS=y CONFIG_JFFS2_FS=y CONFIG_JFFS2_FS_DEBUG=1 -CONFIG_JFFS2_FS_WRITEBUFFER=y -CONFIG_JFFS2_ZLIB=y -CONFIG_JFFS2_RTIME=y CONFIG_UBIFS_FS=y -CONFIG_UBIFS_FS_XATTR=y -CONFIG_UBIFS_FS_LZO=y -CONFIG_UBIFS_FS_ZLIB=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m CONFIG_CRC_T10DIF=y -CONFIG_CRC16=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_LZO_COMPRESS=y -CONFIG_LZO_DECOMPRESS=y -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_LZO=y +CONFIG_DEBUG_INFO=y CONFIG_FRAME_WARN=1024 -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=y @@ -180,4 +155,3 @@ CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_FSL_CAAM=y -CONFIG_FSL_CORENET_CF=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 7594c5ac6481..6fab06f7f411 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -16,6 +16,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index c8b6a9ddb21b..fbd9e4163311 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -16,6 +16,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_MAPLE=y diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index fa1bfd37f1ec..d2c415489f72 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -213,7 +213,6 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y -CONFIG_RTC_DRV_DS1307=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y # CONFIG_NET_DMA is not set @@ -227,6 +226,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 0b452ebd8b3d..87460083dbc7 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -214,7 +214,6 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y -CONFIG_RTC_DRV_DS1307=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y # CONFIG_NET_DMA is not set @@ -228,6 +227,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig index 35595ea74ff4..fc58aa8a89e4 100644 --- a/arch/powerpc/configs/mpc86xx_defconfig +++ b/arch/powerpc/configs/mpc86xx_defconfig @@ -145,6 +145,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_ADFS_FS=m diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index e5e7838af008..3e72c8c06a0d 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -14,6 +14,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_MAC_PARTITION=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_PASEMI=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 36518870e6b2..20bc5e2d368d 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -50,6 +50,7 @@ CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_SCHED_SMT=y diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 28992d012926..512d2782b043 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -26,76 +26,53 @@ static __inline__ void atomic_set(atomic_t *v, int i) __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); } -static __inline__ void atomic_add(int a, atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%3 # atomic_add\n\ - add %0,%2,%0\n" - PPC405_ERR77(0,%3) -" stwcx. %0,0,%3 \n\ - bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (a), "r" (&v->counter) - : "cc"); +#define ATOMIC_OP(op, asm_op) \ +static __inline__ void atomic_##op(int a, atomic_t *v) \ +{ \ + int t; \ + \ + __asm__ __volatile__( \ +"1: lwarx %0,0,%3 # atomic_" #op "\n" \ + #asm_op " %0,%2,%0\n" \ + PPC405_ERR77(0,%3) \ +" stwcx. %0,0,%3 \n" \ +" bne- 1b\n" \ + : "=&r" (t), "+m" (v->counter) \ + : "r" (a), "r" (&v->counter) \ + : "cc"); \ +} \ + +#define ATOMIC_OP_RETURN(op, asm_op) \ +static __inline__ int atomic_##op##_return(int a, atomic_t *v) \ +{ \ + int t; \ + \ + __asm__ __volatile__( \ + PPC_ATOMIC_ENTRY_BARRIER \ +"1: lwarx %0,0,%2 # atomic_" #op "_return\n" \ + #asm_op " %0,%1,%0\n" \ + PPC405_ERR77(0,%2) \ +" stwcx. %0,0,%2 \n" \ +" bne- 1b\n" \ + PPC_ATOMIC_EXIT_BARRIER \ + : "=&r" (t) \ + : "r" (a), "r" (&v->counter) \ + : "cc", "memory"); \ + \ + return t; \ } -static __inline__ int atomic_add_return(int a, atomic_t *v) -{ - int t; +#define ATOMIC_OPS(op, asm_op) ATOMIC_OP(op, asm_op) ATOMIC_OP_RETURN(op, asm_op) - __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: lwarx %0,0,%2 # atomic_add_return\n\ - add %0,%1,%0\n" - PPC405_ERR77(0,%2) -" stwcx. %0,0,%2 \n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) - : "r" (a), "r" (&v->counter) - : "cc", "memory"); +ATOMIC_OPS(add, add) +ATOMIC_OPS(sub, subf) - return t; -} +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) -static __inline__ void atomic_sub(int a, atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%3 # atomic_sub\n\ - subf %0,%2,%0\n" - PPC405_ERR77(0,%3) -" stwcx. %0,0,%3 \n\ - bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (a), "r" (&v->counter) - : "cc"); -} - -static __inline__ int atomic_sub_return(int a, atomic_t *v) -{ - int t; - - __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: lwarx %0,0,%2 # atomic_sub_return\n\ - subf %0,%1,%0\n" - PPC405_ERR77(0,%2) -" stwcx. %0,0,%2 \n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) - : "r" (a), "r" (&v->counter) - : "cc", "memory"); - - return t; -} - static __inline__ void atomic_inc(atomic_t *v) { int t; @@ -289,71 +266,50 @@ static __inline__ void atomic64_set(atomic64_t *v, long i) __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); } -static __inline__ void atomic64_add(long a, atomic64_t *v) -{ - long t; - - __asm__ __volatile__( -"1: ldarx %0,0,%3 # atomic64_add\n\ - add %0,%2,%0\n\ - stdcx. %0,0,%3 \n\ - bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (a), "r" (&v->counter) - : "cc"); +#define ATOMIC64_OP(op, asm_op) \ +static __inline__ void atomic64_##op(long a, atomic64_t *v) \ +{ \ + long t; \ + \ + __asm__ __volatile__( \ +"1: ldarx %0,0,%3 # atomic64_" #op "\n" \ + #asm_op " %0,%2,%0\n" \ +" stdcx. %0,0,%3 \n" \ +" bne- 1b\n" \ + : "=&r" (t), "+m" (v->counter) \ + : "r" (a), "r" (&v->counter) \ + : "cc"); \ } -static __inline__ long atomic64_add_return(long a, atomic64_t *v) -{ - long t; - - __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: ldarx %0,0,%2 # atomic64_add_return\n\ - add %0,%1,%0\n\ - stdcx. %0,0,%2 \n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) - : "r" (a), "r" (&v->counter) - : "cc", "memory"); - - return t; +#define ATOMIC64_OP_RETURN(op, asm_op) \ +static __inline__ long atomic64_##op##_return(long a, atomic64_t *v) \ +{ \ + long t; \ + \ + __asm__ __volatile__( \ + PPC_ATOMIC_ENTRY_BARRIER \ +"1: ldarx %0,0,%2 # atomic64_" #op "_return\n" \ + #asm_op " %0,%1,%0\n" \ +" stdcx. %0,0,%2 \n" \ +" bne- 1b\n" \ + PPC_ATOMIC_EXIT_BARRIER \ + : "=&r" (t) \ + : "r" (a), "r" (&v->counter) \ + : "cc", "memory"); \ + \ + return t; \ } -#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) - -static __inline__ void atomic64_sub(long a, atomic64_t *v) -{ - long t; - - __asm__ __volatile__( -"1: ldarx %0,0,%3 # atomic64_sub\n\ - subf %0,%2,%0\n\ - stdcx. %0,0,%3 \n\ - bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (a), "r" (&v->counter) - : "cc"); -} +#define ATOMIC64_OPS(op, asm_op) ATOMIC64_OP(op, asm_op) ATOMIC64_OP_RETURN(op, asm_op) -static __inline__ long atomic64_sub_return(long a, atomic64_t *v) -{ - long t; +ATOMIC64_OPS(add, add) +ATOMIC64_OPS(sub, subf) - __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: ldarx %0,0,%2 # atomic64_sub_return\n\ - subf %0,%1,%0\n\ - stdcx. %0,0,%2 \n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) - : "r" (a), "r" (&v->counter) - : "cc", "memory"); +#undef ATOMIC64_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP - return t; -} +#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) static __inline__ void atomic64_inc(atomic64_t *v) { diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 3eb53d741070..3a39283333c3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -133,7 +133,6 @@ extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void die(const char *, struct pt_regs *, long); -extern void print_backtrace(unsigned long *); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/copro.h b/arch/powerpc/include/asm/copro.h new file mode 100644 index 000000000000..ce216df31381 --- /dev/null +++ b/arch/powerpc/include/asm/copro.h @@ -0,0 +1,29 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_COPRO_H +#define _ASM_POWERPC_COPRO_H + +struct copro_slb +{ + u64 esid, vsid; +}; + +int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, + unsigned long dsisr, unsigned *flt); + +int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb); + + +#ifdef CONFIG_PPC_COPRO_BASE +void copro_flush_all_slbs(struct mm_struct *mm); +#else +static inline void copro_flush_all_slbs(struct mm_struct *mm) {} +#endif +#endif /* _ASM_POWERPC_COPRO_H */ diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 607559ab271f..6c840ceab820 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -32,6 +32,8 @@ static inline void setup_cputime_one_jiffy(void) { } typedef u64 __nocast cputime_t; typedef u64 __nocast cputime64_t; +#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new) + #ifdef __KERNEL__ /* diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 150866b2a3fe..894d538f3567 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -135,6 +135,7 @@ static inline int dma_supported(struct device *dev, u64 mask) extern int dma_set_mask(struct device *dev, u64 dma_mask); extern int __dma_set_mask(struct device *dev, u64 dma_mask); +extern u64 __dma_get_required_mask(struct device *dev); #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 9983c3d26bca..3b260efbfbf9 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -146,6 +146,11 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) return edev ? edev->pdev : NULL; } +static inline struct eeh_pe *eeh_dev_to_pe(struct eeh_dev* edev) +{ + return edev ? edev->pe : NULL; +} + /* Return values from eeh_ops::next_error */ enum { EEH_NEXT_ERR_NONE = 0, @@ -167,6 +172,7 @@ enum { #define EEH_OPT_ENABLE 1 /* EEH enable */ #define EEH_OPT_THAW_MMIO 2 /* MMIO enable */ #define EEH_OPT_THAW_DMA 3 /* DMA enable */ +#define EEH_OPT_FREEZE_PE 4 /* Freeze PE */ #define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */ #define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */ #define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */ @@ -198,6 +204,8 @@ struct eeh_ops { int (*wait_state)(struct eeh_pe *pe, int max_wait); int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); + int (*err_inject)(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask); int (*read_config)(struct device_node *dn, int where, int size, u32 *val); int (*write_config)(struct device_node *dn, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); @@ -269,8 +277,7 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb); int eeh_init(void); int __init eeh_ops_register(struct eeh_ops *ops); int __exit eeh_ops_unregister(const char *name); -unsigned long eeh_check_failure(const volatile void __iomem *token, - unsigned long val); +int eeh_check_failure(const volatile void __iomem *token); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_build(void); void eeh_add_device_early(struct device_node *); @@ -279,6 +286,8 @@ void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); void eeh_remove_device(struct pci_dev *); +int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state); +int eeh_pe_reset_and_recover(struct eeh_pe *pe); int eeh_dev_open(struct pci_dev *pdev); void eeh_dev_release(struct pci_dev *pdev); struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group); @@ -321,9 +330,9 @@ static inline void *eeh_dev_init(struct device_node *dn, void *data) static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { } -static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +static inline int eeh_check_failure(const volatile void __iomem *token) { - return val; + return 0; } #define eeh_dev_check_failure(x) (0) @@ -354,7 +363,7 @@ static inline u8 eeh_readb(const volatile void __iomem *addr) { u8 val = in_8(addr); if (EEH_POSSIBLE_ERROR(val, u8)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -362,7 +371,7 @@ static inline u16 eeh_readw(const volatile void __iomem *addr) { u16 val = in_le16(addr); if (EEH_POSSIBLE_ERROR(val, u16)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -370,7 +379,7 @@ static inline u32 eeh_readl(const volatile void __iomem *addr) { u32 val = in_le32(addr); if (EEH_POSSIBLE_ERROR(val, u32)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -378,7 +387,7 @@ static inline u64 eeh_readq(const volatile void __iomem *addr) { u64 val = in_le64(addr); if (EEH_POSSIBLE_ERROR(val, u64)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -386,7 +395,7 @@ static inline u16 eeh_readw_be(const volatile void __iomem *addr) { u16 val = in_be16(addr); if (EEH_POSSIBLE_ERROR(val, u16)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -394,7 +403,7 @@ static inline u32 eeh_readl_be(const volatile void __iomem *addr) { u32 val = in_be32(addr); if (EEH_POSSIBLE_ERROR(val, u32)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -402,7 +411,7 @@ static inline u64 eeh_readq_be(const volatile void __iomem *addr) { u64 val = in_be64(addr); if (EEH_POSSIBLE_ERROR(val, u64)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -416,7 +425,7 @@ static inline void eeh_memcpy_fromio(void *dest, const * were copied. Check all four bytes. */ if (n >= 4 && EEH_POSSIBLE_ERROR(*((u32 *)(dest + n - 4)), u32)) - eeh_check_failure(src, *((u32 *)(dest + n - 4))); + eeh_check_failure(src); } /* in-string eeh macros */ @@ -425,7 +434,7 @@ static inline void eeh_readsb(const volatile void __iomem *addr, void * buf, { _insb(addr, buf, ns); if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8)) - eeh_check_failure(addr, *(u8*)buf); + eeh_check_failure(addr); } static inline void eeh_readsw(const volatile void __iomem *addr, void * buf, @@ -433,7 +442,7 @@ static inline void eeh_readsw(const volatile void __iomem *addr, void * buf, { _insw(addr, buf, ns); if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16)) - eeh_check_failure(addr, *(u16*)buf); + eeh_check_failure(addr); } static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, @@ -441,7 +450,7 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, { _insl(addr, buf, nl); if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32)) - eeh_check_failure(addr, *(u32*)buf); + eeh_check_failure(addr); } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index 5b0c98bd46ab..1cb39c96d155 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -95,7 +95,6 @@ extern volatile struct Hydra __iomem *Hydra; #define HYDRA_INT_SPARE 19 extern int hydra_init(void); -extern void macio_adb_init(void); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 41f13cec8a8f..e8e3a0a04eb0 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -31,11 +31,6 @@ extern atomic_t ppc_n_lost_interrupts; extern irq_hw_number_t virq_to_hw(unsigned int virq); -/** - * irq_early_init - Init irq remapping subsystem - */ -extern void irq_early_init(void); - static __inline__ int irq_canonicalize(int irq) { return irq; diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 16d7e33d35e9..19c36cba37c4 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -81,7 +81,6 @@ extern void default_machine_crash_shutdown(struct pt_regs *regs); extern int crash_shutdown_register(crash_shutdown_t handler); extern int crash_shutdown_unregister(crash_shutdown_t handler); -extern void machine_kexec_simple(struct kimage *image); extern void crash_kexec_secondary(struct pt_regs *regs); extern int overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 3af721633618..307347f8ddbd 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -328,8 +328,6 @@ extern struct machdep_calls *machine_id; extern void probe_machine(void); -extern char cmd_line[COMMAND_LINE_SIZE]; - #ifdef CONFIG_PPC_PMAC /* * Power macintoshes have either a CUDA, PMU or SMU controlling diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index d76514487d6f..aeebc94b2bce 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -190,6 +190,13 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) #ifndef __ASSEMBLY__ +static inline int slb_vsid_shift(int ssize) +{ + if (ssize == MMU_SEGSIZE_256M) + return SLB_VSID_SHIFT; + return SLB_VSID_SHIFT_1T; +} + static inline int segment_shift(int ssize) { if (ssize == MMU_SEGSIZE_256M) @@ -317,6 +324,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned int local, int ssize); struct mm_struct; unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, int local, int ssize, @@ -342,6 +350,8 @@ extern void hash_failure_debug(unsigned long ea, unsigned long access, extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long prot, int psize, int ssize); +int htab_remove_mapping(unsigned long vstart, unsigned long vend, + int psize, int ssize); extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 86055e598269..9124b0ede1fc 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -135,6 +135,7 @@ struct opal_sg_list { #define OPAL_FLASH_MANAGE 77 #define OPAL_FLASH_UPDATE 78 #define OPAL_RESYNC_TIMEBASE 79 +#define OPAL_CHECK_TOKEN 80 #define OPAL_DUMP_INIT 81 #define OPAL_DUMP_INFO 82 #define OPAL_DUMP_READ 83 @@ -146,7 +147,9 @@ struct opal_sg_list { #define OPAL_GET_PARAM 89 #define OPAL_SET_PARAM 90 #define OPAL_DUMP_RESEND 91 +#define OPAL_PCI_SET_PHB_CXL_MODE 93 #define OPAL_DUMP_INFO2 94 +#define OPAL_PCI_ERR_INJECT 96 #define OPAL_PCI_EEH_FREEZE_SET 97 #define OPAL_HANDLE_HMI 98 #define OPAL_REGISTER_DUMP_REGION 101 @@ -199,6 +202,35 @@ enum OpalPciErrorSeverity { OPAL_EEH_SEV_INF = 5 }; +enum OpalErrinjectType { + OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR = 0, + OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64 = 1, +}; + +enum OpalErrinjectFunc { + /* IOA bus specific errors */ + OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR = 0, + OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA = 1, + OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR = 2, + OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA = 3, + OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR = 4, + OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA = 5, + OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR = 6, + OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA = 7, + OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR = 8, + OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA = 9, + OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR = 10, + OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA = 11, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR = 12, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA = 13, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER = 14, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET = 15, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR = 16, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA = 17, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER = 18, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET = 19, +}; + enum OpalShpcAction { OPAL_SHPC_GET_LINK_STATE = 0, OPAL_SHPC_GET_SLOT_STATE = 1 @@ -356,9 +388,12 @@ enum OpalM64EnableAction { }; enum OpalPciResetScope { - OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3, - OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5, - OPAL_PCI_IODA_TABLE_RESET = 6, + OPAL_RESET_PHB_COMPLETE = 1, + OPAL_RESET_PCI_LINK = 2, + OPAL_RESET_PHB_ERROR = 3, + OPAL_RESET_PCI_HOT = 4, + OPAL_RESET_PCI_FUNDAMENTAL = 5, + OPAL_RESET_PCI_IODA_TABLE = 6 }; enum OpalPciReinitScope { @@ -819,6 +854,8 @@ int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number, uint64_t eeh_action_token); int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number, uint64_t eeh_action_token); +int64_t opal_pci_err_inject(uint64_t phb_id, uint32_t pe_no, uint32_t type, + uint32_t func, uint64_t addr, uint64_t mask); int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state); @@ -887,6 +924,7 @@ int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, __be16 *pci_error_type, __be16 *severity); int64_t opal_pci_poll(uint64_t phb_id); int64_t opal_return_cpu(void); +int64_t opal_check_token(uint64_t token); int64_t opal_reinit_cpus(uint64_t flags); int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val); @@ -924,6 +962,7 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); int64_t opal_handle_hmi(void); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); +int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 88693cef4f3d..d908a46d05c0 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -42,20 +42,40 @@ typedef unsigned long pte_basic_t; -static __inline__ void clear_page(void *addr) +static inline void clear_page(void *addr) { - unsigned long lines, line_size; - - line_size = ppc64_caches.dline_size; - lines = ppc64_caches.dlines_per_page; - - __asm__ __volatile__( + unsigned long iterations; + unsigned long onex, twox, fourx, eightx; + + iterations = ppc64_caches.dlines_per_page / 8; + + /* + * Some verisions of gcc use multiply instructions to + * calculate the offsets so lets give it a hand to + * do better. + */ + onex = ppc64_caches.dline_size; + twox = onex << 1; + fourx = onex << 2; + eightx = onex << 3; + + asm volatile( "mtctr %1 # clear_page\n\ -1: dcbz 0,%0\n\ - add %0,%0,%3\n\ + .balign 16\n\ +1: dcbz 0,%0\n\ + dcbz %3,%0\n\ + dcbz %4,%0\n\ + dcbz %5,%0\n\ + dcbz %6,%0\n\ + dcbz %7,%0\n\ + dcbz %8,%0\n\ + dcbz %9,%0\n\ + add %0,%0,%10\n\ bdnz+ 1b" - : "=r" (addr) - : "r" (lines), "0" (addr), "r" (line_size) + : "=&r" (addr) + : "r" (iterations), "0" (addr), "b" (onex), "b" (twox), + "b" (twox+onex), "b" (fourx), "b" (fourx+onex), + "b" (twox+fourx), "b" (eightx-onex), "r" (eightx) : "ctr", "memory"); } @@ -104,7 +124,6 @@ extern unsigned long slice_get_unmapped_area(unsigned long addr, extern unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); -extern void slice_init_context(struct mm_struct *mm, unsigned int psize); extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize); diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 47edde8c3556..945e47adf7db 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -8,8 +8,6 @@ #include <linux/threads.h> #include <asm/io.h> /* For sub-arch specific PPC_PIN_SIZE */ -extern unsigned long va_to_phys(unsigned long address); -extern pte_t *va_to_pte(unsigned long address); extern unsigned long ioremap_bot; #ifdef CONFIG_44x @@ -50,10 +48,10 @@ extern int icache_44x_need_flush; #define FIRST_USER_ADDRESS 0 #define pte_ERROR(e) \ - printk("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ + pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ (unsigned long long)pte_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index 12798c9d4b4b..7b935683f268 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -64,7 +64,7 @@ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) #define pud_ERROR(e) \ - printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 7b3d54fae46f..ae153c40ab7c 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -328,11 +328,11 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) #define pte_ERROR(e) \ - printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) + pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) + pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* Encode and de-code a swap entry */ #define __swp_type(entry) (((entry).val >> 1) & 0x3f) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index f60d4ea8b50c..316f9a5da173 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> +#include <linux/mmzone.h> #include <asm/processor.h> /* For TASK_SIZE */ #include <asm/mmu.h> #include <asm/page.h> @@ -248,6 +249,8 @@ extern unsigned long empty_zero_page[]; extern pgd_t swapper_pg_dir[]; +void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn); +int dma_pfn_limit_to_zone(u64 pfn_limit); extern void paging_init(void); /* diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 12c32c5f533d..67859edbf8fd 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -273,7 +273,7 @@ static inline long plpar_set_mode(unsigned long mflags, unsigned long resource, static inline long enable_reloc_on_exceptions(void) { /* mflags = 3: Exceptions at 0xC000000000004000 */ - return plpar_set_mode(3, 3, 0, 0); + return plpar_set_mode(3, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0); } /* @@ -284,7 +284,7 @@ static inline long enable_reloc_on_exceptions(void) * returns H_SUCCESS. */ static inline long disable_reloc_on_exceptions(void) { - return plpar_set_mode(0, 3, 0, 0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0); } /* @@ -297,7 +297,7 @@ static inline long disable_reloc_on_exceptions(void) { static inline long enable_big_endian_exceptions(void) { /* mflags = 0: big endian exceptions */ - return plpar_set_mode(0, 4, 0, 0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_LE, 0, 0); } /* @@ -310,17 +310,17 @@ static inline long enable_big_endian_exceptions(void) static inline long enable_little_endian_exceptions(void) { /* mflags = 1: little endian exceptions */ - return plpar_set_mode(1, 4, 0, 0); + return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0); } static inline long plapr_set_ciabr(unsigned long ciabr) { - return plpar_set_mode(0, 1, ciabr, 0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0); } static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) { - return plpar_set_mode(0, 2, dawr0, dawrx0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h new file mode 100644 index 000000000000..f09a22fa1bd7 --- /dev/null +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -0,0 +1,31 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_PNV_PCI_H +#define _ASM_PNV_PCI_H + +#include <linux/pci.h> +#include <misc/cxl.h> + +int pnv_phb_to_cxl(struct pci_dev *dev); +int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, + unsigned int virq); +int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); +void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); +int pnv_cxl_get_irq_count(struct pci_dev *dev); +struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev); + +#ifdef CONFIG_CXL_BASE +int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev, int num); +void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev); +#endif + +#endif diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 74b79f07f041..7f436ba1b56f 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -76,8 +76,6 @@ void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window, unsigned long *busno, unsigned long *phys, unsigned long *size); -extern void kdump_move_device_tree(void); - extern void of_instantiate_rtc(void); extern int of_get_ibm_chip_id(struct device_node *np); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 0c0505956a29..fe3f9488f321 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -947,7 +947,7 @@ * 32-bit 8xx: * - SPRG0 scratch for exception vectors * - SPRG1 scratch for exception vectors - * - SPRG2 apparently unused but initialized + * - SPRG2 scratch for exception vectors * */ #ifdef CONFIG_PPC64 @@ -1057,6 +1057,7 @@ #ifdef CONFIG_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 +#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 #endif diff --git a/arch/powerpc/include/asm/rio.h b/arch/powerpc/include/asm/rio.h index b1d2deceeedb..ec800f28fec5 100644 --- a/arch/powerpc/include/asm/rio.h +++ b/arch/powerpc/include/asm/rio.h @@ -13,7 +13,6 @@ #ifndef ASM_PPC_RIO_H #define ASM_PPC_RIO_H -extern void platform_rio_init(void); #ifdef CONFIG_FSL_RIO extern int fsl_rio_mcheck_exception(struct pt_regs *); #else diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h index 37b7ca39ec9f..a6e6e2bf9d15 100644 --- a/arch/powerpc/include/asm/spu.h +++ b/arch/powerpc/include/asm/spu.h @@ -27,6 +27,8 @@ #include <linux/workqueue.h> #include <linux/device.h> #include <linux/mutex.h> +#include <asm/reg.h> +#include <asm/copro.h> #define LS_SIZE (256 * 1024) #define LS_ADDR_MASK (LS_SIZE - 1) @@ -277,9 +279,6 @@ void spu_remove_dev_attr(struct device_attribute *attr); int spu_add_dev_attr_group(struct attribute_group *attrs); void spu_remove_dev_attr_group(struct attribute_group *attrs); -int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, - unsigned long dsisr, unsigned *flt); - /* * Notifier blocks: * diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index f593b0f9b627..d3a42cc45a82 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -25,3 +25,65 @@ struct pt_regs; /* Emulate instructions that cause a transfer of control. */ extern int emulate_step(struct pt_regs *regs, unsigned int instr); + +enum instruction_type { + COMPUTE, /* arith/logical/CR op, etc. */ + LOAD, + LOAD_MULTI, + LOAD_FP, + LOAD_VMX, + LOAD_VSX, + STORE, + STORE_MULTI, + STORE_FP, + STORE_VMX, + STORE_VSX, + LARX, + STCX, + BRANCH, + MFSPR, + MTSPR, + CACHEOP, + BARRIER, + SYSCALL, + MFMSR, + MTMSR, + RFI, + INTERRUPT, + UNKNOWN +}; + +#define INSTR_TYPE_MASK 0x1f + +/* Load/store flags, ORed in with type */ +#define SIGNEXT 0x20 +#define UPDATE 0x40 /* matches bit in opcode 31 instructions */ +#define BYTEREV 0x80 + +/* Cacheop values, ORed in with type */ +#define CACHEOP_MASK 0x700 +#define DCBST 0 +#define DCBF 0x100 +#define DCBTST 0x200 +#define DCBT 0x300 +#define ICBI 0x400 + +/* Size field in type word */ +#define SIZE(n) ((n) << 8) +#define GETSIZE(w) ((w) >> 8) + +#define MKOP(t, f, s) ((t) | (f) | SIZE(s)) + +struct instruction_op { + int type; + int reg; + unsigned long val; + /* For LOAD/STORE/LARX/STCX */ + unsigned long ea; + int update_reg; + /* For MFSPR */ + int spr; +}; + +extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs, + unsigned int instr); diff --git a/arch/powerpc/include/asm/tsi108.h b/arch/powerpc/include/asm/tsi108.h index f8b60793b7a9..d531d9e173ef 100644 --- a/arch/powerpc/include/asm/tsi108.h +++ b/arch/powerpc/include/asm/tsi108.h @@ -84,10 +84,6 @@ extern u32 tsi108_pci_cfg_base; /* Exported functions */ -extern int tsi108_bridge_init(struct pci_controller *hose, uint phys_csr_base); -extern unsigned long tsi108_get_mem_size(void); -extern unsigned long tsi108_get_cpu_clk(void); -extern unsigned long tsi108_get_sdc_clk(void); extern int tsi108_direct_write_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 val); extern int tsi108_direct_read_config(struct pci_bus *bus, unsigned int devfn, diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index b51fba10e733..78f2675f2aac 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -52,7 +52,6 @@ extern void __init udbg_init_44x_as1(void); extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); -extern void __init udbg_init_wsp(void); extern void __init udbg_init_memcons(void); extern void __init udbg_init_ehv_bc(void); extern void __init udbg_init_ps3gelic(void); diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 9a5c928bb3c6..5b3a903adae6 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -42,32 +42,65 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct #else +#ifdef CONFIG_64BIT + +/* unused */ struct word_at_a_time { - const unsigned long one_bits, high_bits; }; -#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } +#define WORD_AT_A_TIME_CONSTANTS { } -#ifdef CONFIG_64BIT +/* This will give us 0xff for a NULL char and 0x00 elsewhere */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long ret; + unsigned long zero = 0; -/* Alan Modra's little-endian strlen tail for 64-bit */ -#define create_zero_mask(mask) (mask) + asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero)); + *bits = ret; -static inline unsigned long find_zero(unsigned long mask) + return ret; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +/* Alan Modra's little-endian strlen tail for 64-bit */ +static inline unsigned long create_zero_mask(unsigned long bits) { unsigned long leading_zero_bits; long trailing_zero_bit_mask; - asm ("addi %1,%2,-1\n\t" - "andc %1,%1,%2\n\t" - "popcntd %0,%1" - : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask) - : "r" (mask)); - return leading_zero_bits >> 3; + asm("addi %1,%2,-1\n\t" + "andc %1,%1,%2\n\t" + "popcntd %0,%1" + : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask) + : "r" (bits)); + + return leading_zero_bits; +} + +static inline unsigned long find_zero(unsigned long mask) +{ + return mask >> 3; +} + +/* This assumes that we never ask for an all 1s bitmask */ +static inline unsigned long zero_bytemask(unsigned long mask) +{ + return (1UL << mask) - 1; } #else /* 32-bit case */ +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + /* * This is largely generic for little-endian machines, but the * optimal byte mask counting is probably going to be something @@ -96,8 +129,6 @@ static inline unsigned long find_zero(unsigned long mask) return count_masked_bytes(mask); } -#endif - /* Return nonzero if it has a zero */ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) { @@ -114,6 +145,59 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, /* The mask we created is directly usable as a bytemask */ #define zero_bytemask(mask) (mask) +#endif /* CONFIG_64BIT */ + +#endif /* __BIG_ENDIAN__ */ + +/* + * We use load_unaligned_zero() in a selftest, which builds a userspace + * program. Some linker scripts seem to discard the .fixup section, so allow + * the test code to use a different section name. + */ +#ifndef FIXUP_SECTION +#define FIXUP_SECTION ".fixup" +#endif + +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, offset, tmp; + + asm( + "1: " PPC_LL "%[ret], 0(%[addr])\n" + "2:\n" + ".section " FIXUP_SECTION ",\"ax\"\n" + "3: " +#ifdef __powerpc64__ + "clrrdi %[tmp], %[addr], 3\n\t" + "clrlsldi %[offset], %[addr], 61, 3\n\t" + "ld %[ret], 0(%[tmp])\n\t" +#ifdef __BIG_ENDIAN__ + "sld %[ret], %[ret], %[offset]\n\t" +#else + "srd %[ret], %[ret], %[offset]\n\t" #endif +#else + "clrrwi %[tmp], %[addr], 2\n\t" + "clrlslwi %[offset], %[addr], 30, 3\n\t" + "lwz %[ret], 0(%[tmp])\n\t" +#ifdef __BIG_ENDIAN__ + "slw %[ret], %[ret], %[offset]\n\t" +#else + "srw %[ret], %[ret], %[offset]\n\t" +#endif +#endif + "b 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n\t" + PPC_LONG_ALIGN "\n\t" + PPC_LONG "1b,3b\n" + ".previous" + : [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret) + : [addr] "b" (addr), "m" (*(unsigned long *)addr)); + + return ret; +} + +#undef FIXUP_SECTION #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 282d43a0c855..0d050ea37a04 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -29,6 +29,7 @@ /* Native ICP */ #ifdef CONFIG_PPC_ICP_NATIVE extern int icp_native_init(void); +extern void icp_native_flush_interrupt(void); #else static inline int icp_native_init(void) { return -ENODEV; } #endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 670c312d914e..502cf69b6c89 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -93,6 +93,9 @@ obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_MODULES) += ppc_ksyms.o +ifeq ($(CONFIG_PPC32),y) +obj-$(CONFIG_MODULES) += ppc_ksyms_32.o +endif obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 7a13f378ca2c..c78e6dac4d7d 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,6 +13,7 @@ #include <linux/crash_dump.h> #include <linux/bootmem.h> +#include <linux/io.h> #include <linux/memblock.h> #include <asm/code-patching.h> #include <asm/kdump.h> diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index bd1a2aba599f..735979764cd4 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -106,10 +106,14 @@ int __init swiotlb_setup_bus_notifier(void) return 0; } -void swiotlb_detect_4g(void) +void __init swiotlb_detect_4g(void) { - if ((memblock_end_of_DRAM() - 1) > 0xffffffff) + if ((memblock_end_of_DRAM() - 1) > 0xffffffff) { ppc_swiotlb_enable = 1; +#ifdef CONFIG_ZONE_DMA32 + limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT); +#endif + } } static int __init swiotlb_late_init(void) diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index ee78f6e49d64..adac9dc54aee 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -15,6 +15,7 @@ #include <asm/vio.h> #include <asm/bug.h> #include <asm/machdep.h> +#include <asm/swiotlb.h> /* * Generic direct DMA implementation @@ -25,6 +26,18 @@ * default the offset is PCI_DRAM_OFFSET. */ +static u64 __maybe_unused get_pfn_limit(struct device *dev) +{ + u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1; + struct dev_archdata __maybe_unused *sd = &dev->archdata; + +#ifdef CONFIG_SWIOTLB + if (sd->max_direct_dma_addr && sd->dma_ops == &swiotlb_dma_ops) + pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT); +#endif + + return pfn; +} void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, @@ -40,6 +53,26 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, #else struct page *page; int node = dev_to_node(dev); + u64 pfn = get_pfn_limit(dev); + int zone; + + zone = dma_pfn_limit_to_zone(pfn); + if (zone < 0) { + dev_err(dev, "%s: No suitable zone for pfn %#llx\n", + __func__, pfn); + return NULL; + } + + switch (zone) { + case ZONE_DMA: + flag |= GFP_DMA; + break; +#ifdef CONFIG_ZONE_DMA32 + case ZONE_DMA32: + flag |= GFP_DMA32; + break; +#endif + }; /* ignore region specifiers */ flag &= ~(__GFP_HIGHMEM); @@ -202,6 +235,7 @@ int __dma_set_mask(struct device *dev, u64 dma_mask) *dev->dma_mask = dma_mask; return 0; } + int dma_set_mask(struct device *dev, u64 dma_mask) { if (ppc_md.dma_set_mask) @@ -210,13 +244,10 @@ int dma_set_mask(struct device *dev, u64 dma_mask) } EXPORT_SYMBOL(dma_set_mask); -u64 dma_get_required_mask(struct device *dev) +u64 __dma_get_required_mask(struct device *dev) { struct dma_map_ops *dma_ops = get_dma_ops(dev); - if (ppc_md.dma_get_required_mask) - return ppc_md.dma_get_required_mask(dev); - if (unlikely(dma_ops == NULL)) return 0; @@ -225,6 +256,14 @@ u64 dma_get_required_mask(struct device *dev) return DMA_BIT_MASK(8 * sizeof(dma_addr_t)); } + +u64 dma_get_required_mask(struct device *dev) +{ + if (ppc_md.dma_get_required_mask) + return ppc_md.dma_get_required_mask(dev); + + return __dma_get_required_mask(dev); +} EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init dma_init(void) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 59a64f8dc85f..d543e4179c18 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -117,7 +117,7 @@ static DEFINE_MUTEX(eeh_dev_mutex); * not dynamically alloced, so that it ends up in RMO where RTAS * can access it. */ -#define EEH_PCI_REGS_LOG_LEN 4096 +#define EEH_PCI_REGS_LOG_LEN 8192 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; /* @@ -148,16 +148,12 @@ static int __init eeh_setup(char *str) } __setup("eeh=", eeh_setup); -/** - * eeh_gather_pci_data - Copy assorted PCI config space registers to buff - * @edev: device to report data for - * @buf: point to buffer in which to log - * @len: amount of room in buffer - * - * This routine captures assorted PCI configuration space data, - * and puts them into a buffer for RTAS error logging. +/* + * This routine captures assorted PCI configuration space data + * for the indicated PCI device, and puts them into a buffer + * for RTAS error logging. */ -static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len) +static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) { struct device_node *dn = eeh_dev_to_of_node(edev); u32 cfg; @@ -255,6 +251,19 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len) return n; } +static void *eeh_dump_pe_log(void *data, void *flag) +{ + struct eeh_pe *pe = data; + struct eeh_dev *edev, *tmp; + size_t *plen = flag; + + eeh_pe_for_each_dev(pe, edev, tmp) + *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, + EEH_PCI_REGS_LOG_LEN - *plen); + + return NULL; +} + /** * eeh_slot_error_detail - Generate combined log including driver log and error log * @pe: EEH PE @@ -268,7 +277,6 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len) void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; - struct eeh_dev *edev, *tmp; /* * When the PHB is fenced or dead, it's pointless to collect @@ -286,10 +294,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) eeh_pe_restore_bars(pe); pci_regs_buf[0] = 0; - eeh_pe_for_each_dev(pe, edev, tmp) { - loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, - EEH_PCI_REGS_LOG_LEN - loglen); - } + eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); } eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); @@ -410,7 +415,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) } dn = eeh_dev_to_of_node(edev); dev = eeh_dev_to_pci_dev(edev); - pe = edev->pe; + pe = eeh_dev_to_pe(edev); /* Access to IO BARs might get this far and still not want checking. */ if (!pe) { @@ -542,17 +547,16 @@ EXPORT_SYMBOL_GPL(eeh_dev_check_failure); /** * eeh_check_failure - Check if all 1's data is due to EEH slot freeze - * @token: I/O token, should be address in the form 0xA.... - * @val: value, should be all 1's (XXX why do we need this arg??) + * @token: I/O address * - * Check for an EEH failure at the given token address. Call this + * Check for an EEH failure at the given I/O address. Call this * routine if the result of a read was all 0xff's and you want to - * find out if this is due to an EEH slot freeze event. This routine + * find out if this is due to an EEH slot freeze event. This routine * will query firmware for the EEH status. * * Note this routine is safe to call in an interrupt context. */ -unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +int eeh_check_failure(const volatile void __iomem *token) { unsigned long addr; struct eeh_dev *edev; @@ -562,13 +566,11 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon edev = eeh_addr_cache_get_dev(addr); if (!edev) { eeh_stats.no_device++; - return val; + return 0; } - eeh_dev_check_failure(edev); - return val; + return eeh_dev_check_failure(edev); } - EXPORT_SYMBOL(eeh_check_failure); @@ -582,25 +584,51 @@ EXPORT_SYMBOL(eeh_check_failure); */ int eeh_pci_enable(struct eeh_pe *pe, int function) { - int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + int active_flag, rc; /* * pHyp doesn't allow to enable IO or DMA on unfrozen PE. * Also, it's pointless to enable them on unfrozen PE. So - * we have the check here. + * we have to check before enabling IO or DMA. */ - if (function == EEH_OPT_THAW_MMIO || - function == EEH_OPT_THAW_DMA) { + switch (function) { + case EEH_OPT_THAW_MMIO: + active_flag = EEH_STATE_MMIO_ACTIVE; + break; + case EEH_OPT_THAW_DMA: + active_flag = EEH_STATE_DMA_ACTIVE; + break; + case EEH_OPT_DISABLE: + case EEH_OPT_ENABLE: + case EEH_OPT_FREEZE_PE: + active_flag = 0; + break; + default: + pr_warn("%s: Invalid function %d\n", + __func__, function); + return -EINVAL; + } + + /* + * Check if IO or DMA has been enabled before + * enabling them. + */ + if (active_flag) { rc = eeh_ops->get_state(pe, NULL); if (rc < 0) return rc; - /* Needn't to enable or already enabled */ - if ((rc == EEH_STATE_NOT_SUPPORT) || - ((rc & flags) == flags)) + /* Needn't enable it at all */ + if (rc == EEH_STATE_NOT_SUPPORT) + return 0; + + /* It's already enabled */ + if (rc & active_flag) return 0; } + + /* Issue the request */ rc = eeh_ops->set_option(pe, function); if (rc) pr_warn("%s: Unexpected state change %d on " @@ -608,17 +636,17 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) __func__, function, pe->phb->global_number, pe->addr, rc); - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc <= 0) - return rc; + /* Check if the request is finished successfully */ + if (active_flag) { + rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if (rc <= 0) + return rc; - if ((function == EEH_OPT_THAW_MMIO) && - (rc & EEH_STATE_MMIO_ENABLED)) - return 0; + if (rc & active_flag) + return 0; - if ((function == EEH_OPT_THAW_DMA) && - (rc & EEH_STATE_DMA_ENABLED)) - return 0; + return -EIO; + } return rc; } @@ -634,7 +662,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - struct eeh_pe *pe = edev->pe; + struct eeh_pe *pe = eeh_dev_to_pe(edev); if (!pe) { pr_err("%s: No PE found on PCI device %s\n", @@ -645,14 +673,18 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat switch (state) { case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); + eeh_pe_state_clear(pe, EEH_PE_RESET); break; case pcie_hot_reset: + eeh_pe_state_mark(pe, EEH_PE_RESET); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: + eeh_pe_state_mark(pe, EEH_PE_RESET); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: + eeh_pe_state_clear(pe, EEH_PE_RESET); return -EINVAL; }; @@ -1141,6 +1173,85 @@ void eeh_remove_device(struct pci_dev *dev) edev->mode &= ~EEH_DEV_SYSFS; } +int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) +{ + int ret; + + ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (ret) { + pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } + + ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (ret) { + pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } + + /* Clear software isolated state */ + if (sw_state && (pe->state & EEH_PE_ISOLATED)) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + + return ret; +} + + +static struct pci_device_id eeh_reset_ids[] = { + { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ + { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ + { 0 } +}; + +static int eeh_pe_change_owner(struct eeh_pe *pe) +{ + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + struct pci_device_id *id; + int flags, ret; + + /* Check PE state */ + flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + ret = eeh_ops->get_state(pe, NULL); + if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) + return 0; + + /* Unfrozen PE, nothing to do */ + if ((ret & flags) == flags) + return 0; + + /* Frozen PE, check if it needs PE level reset */ + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { + if (id->vendor != PCI_ANY_ID && + id->vendor != pdev->vendor) + continue; + if (id->device != PCI_ANY_ID && + id->device != pdev->device) + continue; + if (id->subvendor != PCI_ANY_ID && + id->subvendor != pdev->subsystem_vendor) + continue; + if (id->subdevice != PCI_ANY_ID && + id->subdevice != pdev->subsystem_device) + continue; + + goto reset; + } + } + + return eeh_unfreeze_pe(pe, true); + +reset: + return eeh_pe_reset_and_recover(pe); +} + /** * eeh_dev_open - Increase count of pass through devices for PE * @pdev: PCI device @@ -1153,6 +1264,7 @@ void eeh_remove_device(struct pci_dev *dev) int eeh_dev_open(struct pci_dev *pdev) { struct eeh_dev *edev; + int ret = -ENODEV; mutex_lock(&eeh_dev_mutex); @@ -1165,6 +1277,16 @@ int eeh_dev_open(struct pci_dev *pdev) if (!edev || !edev->pe) goto out; + /* + * The PE might have been put into frozen state, but we + * didn't detect that yet. The passed through PCI devices + * in frozen PE won't work properly. Clear the frozen state + * in advance. + */ + ret = eeh_pe_change_owner(edev->pe); + if (ret) + goto out; + /* Increase PE's pass through count */ atomic_inc(&edev->pe->pass_dev_cnt); mutex_unlock(&eeh_dev_mutex); @@ -1172,7 +1294,7 @@ int eeh_dev_open(struct pci_dev *pdev) return 0; out: mutex_unlock(&eeh_dev_mutex); - return -ENODEV; + return ret; } EXPORT_SYMBOL_GPL(eeh_dev_open); @@ -1202,6 +1324,7 @@ void eeh_dev_release(struct pci_dev *pdev) /* Decrease PE's pass through count */ atomic_dec(&edev->pe->pass_dev_cnt); WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); + eeh_pe_change_owner(edev->pe); out: mutex_unlock(&eeh_dev_mutex); } @@ -1281,8 +1404,10 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option) */ switch (option) { case EEH_OPT_ENABLE: - if (eeh_enabled()) + if (eeh_enabled()) { + ret = eeh_pe_change_owner(pe); break; + } ret = -EIO; break; case EEH_OPT_DISABLE: @@ -1294,7 +1419,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option) break; } - ret = eeh_ops->set_option(pe, option); + ret = eeh_pci_enable(pe, option); break; default: pr_debug("%s: Option %d out of range (%d, %d)\n", @@ -1345,6 +1470,36 @@ int eeh_pe_get_state(struct eeh_pe *pe) } EXPORT_SYMBOL_GPL(eeh_pe_get_state); +static int eeh_pe_reenable_devices(struct eeh_pe *pe) +{ + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + int ret = 0; + + /* Restore config space */ + eeh_pe_restore_bars(pe); + + /* + * Reenable PCI devices as the devices passed + * through are always enabled before the reset. + */ + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + ret = pci_reenable_device(pdev); + if (ret) { + pr_warn("%s: Failure %d reenabling %s\n", + __func__, ret, pci_name(pdev)); + return ret; + } + } + + /* The PE is still in frozen state */ + return eeh_unfreeze_pe(pe, true); +} + /** * eeh_pe_reset - Issue PE reset according to specified type * @pe: EEH PE @@ -1368,23 +1523,22 @@ int eeh_pe_reset(struct eeh_pe *pe, int option) switch (option) { case EEH_RESET_DEACTIVATE: ret = eeh_ops->reset(pe, option); + eeh_pe_state_clear(pe, EEH_PE_RESET); if (ret) break; - /* - * The PE is still in frozen state and we need to clear - * that. It's good to clear frozen state after deassert - * to avoid messy IO access during reset, which might - * cause recursive frozen PE. - */ - ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO); - if (!ret) - ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA); - if (!ret) - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + ret = eeh_pe_reenable_devices(pe); break; case EEH_RESET_HOT: case EEH_RESET_FUNDAMENTAL: + /* + * Proactively freeze the PE to drop all MMIO access + * during reset, which should be banned as it's always + * cause recursive EEH error. + */ + eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); + + eeh_pe_state_mark(pe, EEH_PE_RESET); ret = eeh_ops->reset(pe, option); break; default: @@ -1413,9 +1567,6 @@ int eeh_pe_configure(struct eeh_pe *pe) if (!pe) return -ENODEV; - /* Restore config space for the affected devices */ - eeh_pe_restore_bars(pe); - return ret; } EXPORT_SYMBOL_GPL(eeh_pe_configure); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 6a0dcee8e931..3fd514f8e4b2 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -180,6 +180,22 @@ static bool eeh_dev_removed(struct eeh_dev *edev) return false; } +static void *eeh_dev_save_state(void *data, void *userdata) +{ + struct eeh_dev *edev = data; + struct pci_dev *pdev; + + if (!edev) + return NULL; + + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + return NULL; + + pci_save_state(pdev); + return NULL; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -303,6 +319,22 @@ static void *eeh_report_reset(void *data, void *userdata) return NULL; } +static void *eeh_dev_restore_state(void *data, void *userdata) +{ + struct eeh_dev *edev = data; + struct pci_dev *pdev; + + if (!edev) + return NULL; + + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + return NULL; + + pci_restore_state(pdev); + return NULL; +} + /** * eeh_report_resume - Tell device to resume normal operations * @data: eeh device @@ -450,38 +482,82 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - int i, rc; + bool *clear_sw_state = flag; + int i, rc = 1; - for (i = 0; i < 3; i++) { - rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); - if (rc) - continue; - rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); - if (!rc) - break; - } + for (i = 0; rc && i < 3; i++) + rc = eeh_unfreeze_pe(pe, clear_sw_state); - /* The PE has been isolated, clear it */ + /* Stop immediately on any errors */ if (rc) { - pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", - __func__, pe->phb->global_number, pe->addr, rc); + pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n", + __func__, rc, pe->phb->global_number, pe->addr); return (void *)pe; } return NULL; } -static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe, + bool clear_sw_state) { void *rc; - rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); + rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state); if (!rc) eeh_pe_state_clear(pe, EEH_PE_ISOLATED); return rc ? -EIO : 0; } +int eeh_pe_reset_and_recover(struct eeh_pe *pe) +{ + int result, ret; + + /* Bail if the PE is being recovered */ + if (pe->state & EEH_PE_RECOVERING) + return 0; + + /* Put the PE into recovery mode */ + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + + /* Save states */ + eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); + + /* Report error */ + eeh_pe_dev_traverse(pe, eeh_report_error, &result); + + /* Issue reset */ + eeh_pe_state_mark(pe, EEH_PE_RESET); + ret = eeh_reset_pe(pe); + if (ret) { + eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET); + return ret; + } + eeh_pe_state_clear(pe, EEH_PE_RESET); + + /* Unfreeze the PE */ + ret = eeh_clear_pe_frozen_state(pe, true); + if (ret) { + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + return ret; + } + + /* Notify completion of reset */ + eeh_pe_dev_traverse(pe, eeh_report_reset, &result); + + /* Restore device state */ + eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL); + + /* Resume */ + eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); + + /* Clear recovery mode */ + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + + return 0; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -540,7 +616,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) eeh_pe_state_clear(pe, EEH_PE_RESET); /* Clear frozen state */ - rc = eeh_clear_pe_frozen_state(pe); + rc = eeh_clear_pe_frozen_state(pe, false); if (rc) return rc; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 00e3844525a6..53dd0915e690 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -428,7 +428,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) } /* Remove the EEH device */ - pe = edev->pe; + pe = eeh_dev_to_pe(edev); edev->pe = NULL; list_del(&edev->list); @@ -584,6 +584,8 @@ static void *__eeh_pe_state_clear(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; /* Keep the state of permanently removed PE intact */ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && @@ -592,9 +594,22 @@ static void *__eeh_pe_state_clear(void *data, void *flag) pe->state &= ~state; - /* Clear check count since last isolation */ - if (state & EEH_PE_ISOLATED) - pe->check_count = 0; + /* + * Special treatment on clearing isolated state. Clear + * check count since last isolation and put all affected + * devices to normal state. + */ + if (!(state & EEH_PE_ISOLATED)) + return NULL; + + pe->check_count = 0; + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + pdev->error_state = pci_channel_io_normal; + } return NULL; } diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index e2595ba4b720..f19b1e5cb060 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -54,6 +54,43 @@ EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); +static ssize_t eeh_pe_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + int state; + + if (!edev || !edev->pe) + return -ENODEV; + + state = eeh_ops->get_state(edev->pe, NULL); + return sprintf(buf, "%0x08x %0x08x\n", + state, edev->pe->state); +} + +static ssize_t eeh_pe_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + + if (!edev || !edev->pe) + return -ENODEV; + + /* Nothing to do if it's not frozen */ + if (!(edev->pe->state & EEH_PE_ISOLATED)) + return count; + + if (eeh_unfreeze_pe(edev->pe, true)) + return -EIO; + + return count; +} + +static DEVICE_ATTR_RW(eeh_pe_state); + void eeh_sysfs_add_device(struct pci_dev *pdev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); @@ -68,9 +105,10 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state); if (rc) - printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); + pr_warn("EEH: Unable to create sysfs entries\n"); else if (edev) edev->mode |= EEH_DEV_SYSFS; } @@ -92,6 +130,7 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state); if (edev) edev->mode &= ~EEH_DEV_SYSFS; diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 7ee876d2adb5..fafff8dbd5d9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -104,12 +104,15 @@ turn_on_mmu: * task's thread_struct. */ #define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcr r10; \ + EXCEPTION_PROLOG_0; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 +#define EXCEPTION_PROLOG_0 \ + mtspr SPRN_SPRG_SCRATCH0,r10; \ + mtspr SPRN_SPRG_SCRATCH1,r11; \ + mfcr r10 + #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ andi. r11,r11,MSR_PR; \ @@ -145,6 +148,14 @@ turn_on_mmu: SAVE_2GPRS(7, r11) /* + * Exception exit code. + */ +#define EXCEPTION_EPILOG_0 \ + mtcr r10; \ + mfspr r10,SPRN_SPRG_SCRATCH0; \ + mfspr r11,SPRN_SPRG_SCRATCH1 + +/* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). * @@ -293,16 +304,8 @@ InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcr r10 -#ifdef CONFIG_8xx_CPU6 - stw r10, 0(r0) - stw r11, 4(r0) -#else - mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG2, r11 -#endif + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 addi r11, r10, 0x1000 @@ -359,18 +362,11 @@ InstructionTLBMiss: mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcr r10 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 -#else - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 rfi 2: mfspr r11, SPRN_SRR1 @@ -381,19 +377,11 @@ InstructionTLBMiss: mtspr SPRN_SRR1, r11 /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcr r10 - li r11, 0x00f0 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 -#else - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 b InstructionAccess . = 0x1200 @@ -401,16 +389,8 @@ DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcr r10 -#ifdef CONFIG_8xx_CPU6 - stw r10, 0(r0) - stw r11, 4(r0) -#else - mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG2, r11 -#endif + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the @@ -483,19 +463,12 @@ DataStoreTLBMiss: mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcr r10 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 -#else - mtspr SPRN_DAR, r11 /* Tag DAR */ - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 rfi /* This is an instruction TLB error on the MPC8xx. This could be due @@ -507,35 +480,18 @@ InstructionTLBError: b InstructionAccess /* This is the data TLB error on the MPC8xx. This could be due to - * many reasons, including a dirty update to a pte. We can catch that - * one here, but anything else is an error. First, we track down the - * Linux pte. If it is valid, write access is allowed, but the - * page dirty bit is not set, we will set it and reload the TLB. For - * any other case, we bail out to a higher level function that can - * handle it. + * many reasons, including a dirty update to a pte. We bail out to + * a higher level function that can handle it. */ . = 0x1400 DataTLBError: -#ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) -#endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcr r10 - stw r10, 0(r0) - stw r11, 4(r0) + EXCEPTION_PROLOG_0 - mfspr r10, SPRN_DAR - cmpwi cr0, r10, 0x00f0 + mfspr r11, SPRN_DAR + cmpwi cr0, r11, 0x00f0 beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ -DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */ - mfspr r10, SPRN_M_TW /* Restore registers */ - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) -#endif +DARFixed:/* Return from dcbx instruction bug workaround */ + EXCEPTION_EPILOG_0 b DataAccess EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) @@ -559,11 +515,15 @@ DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. - * DAR is set to the calculated address and r10 also holds the EA on exit. + * DAR is set to the calculated address. */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ @@ -579,16 +539,17 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ lwz r11, 0(r11) /* Get the pte */ +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) /* restore r3 from memory */ +#endif /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 20, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, * no need to include them here */ - srwi r10, r11, 26 /* check if major OP code is 31 */ - cmpwi cr0, r10, 31 - bne- 141f - rlwinm r10, r11, 0, 21, 30 + xoris r10, r11, 0x7c00 /* check if major OP code is 31 */ + rlwinm r10, r10, 0, 21, 5 cmpwi cr0, r10, 2028 /* Is dcbz? */ beq+ 142f cmpwi cr0, r10, 940 /* Is dcbi? */ @@ -599,16 +560,13 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq+ 142f cmpwi cr0, r10, 1964 /* Is icbi? */ beq+ 142f -141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */ +141: mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Nope, go back to normal TLB processing */ 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 142: /* continue, it was a dcbx, dcbi instruction. */ -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) /* restore r3 from memory */ -#endif #ifndef NO_SELF_MODIFYING_CODE andis. r10,r11,0x1f /* test if reg RA is r0 */ li r10,modified_instr@l @@ -619,14 +577,15 @@ FixupDAR:/* Entry point for dcbx workaround. */ stw r11,0(r10) /* store add/and instruction */ dcbf 0,r10 /* flush new instr. to memory. */ icbi 0,r10 /* invalidate instr. cache line */ - lwz r11, 4(r0) /* restore r11 from memory */ - mfspr r10, SPRN_M_TW /* restore r10 from M_TW */ + mfspr r11, SPRN_SPRG_SCRATCH1 /* restore r11 */ + mfspr r10, SPRN_SPRG_SCRATCH0 /* restore r10 */ isync /* Wait until new instr is loaded from memory */ modified_instr: .space 4 /* this is where the add instr. is stored */ bne+ 143f subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */ 143: mtdar r10 /* store faulting EA in DAR */ + mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Go back to normal TLB handling */ #else mfctr r10 @@ -680,13 +639,16 @@ modified_instr: mfdar r11 mtctr r11 /* restore ctr reg from DAR */ mtdar r10 /* save fault EA to DAR */ + mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Go back to normal TLB handling */ /* special handling for r10,r11 since these are modified already */ -153: lwz r11, 4(r0) /* load r11 from memory */ - b 155f -154: mfspr r11, SPRN_M_TW /* load r10 from M_TW */ -155: add r10, r10, r11 /* add it */ +153: mfspr r11, SPRN_SPRG_SCRATCH1 /* load r11 from SPRN_SPRG_SCRATCH1 */ + add r10, r10, r11 /* add it */ + mfctr r11 /* restore r11 */ + b 151b +154: mfspr r11, SPRN_SPRG_SCRATCH0 /* load r10 from SPRN_SPRG_SCRATCH0 */ + add r10, r10, r11 /* add it */ mfctr r11 /* restore r11 */ b 151b #endif diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 0bb5918faaaf..1f7d84e2e8b2 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -293,7 +293,7 @@ out: /* * Handle single-step exceptions following a DABR hit. */ -int __kprobes single_step_dabr_instruction(struct die_args *args) +static int __kprobes single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; struct perf_event *bp = NULL; diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 1114d13ac19f..ac86c53e2542 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -55,7 +55,7 @@ static struct device ibmebus_bus_device = { /* fake "parent" device */ struct bus_type ibmebus_bus_type; /* These devices will automatically be added to the bus during init */ -static struct of_device_id __initdata ibmebus_matches[] = { +static const struct of_device_id ibmebus_matches[] __initconst = { { .compatible = "IBM,lhca" }, { .compatible = "IBM,lhea" }, {}, diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index be05841396cf..c0754bbf8118 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -73,7 +73,7 @@ _GLOBAL(power7_powersave_common) /* Check if something happened while soft-disabled */ lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 + andi. r0,r0,~PACA_IRQ_HARD_DIS@l beq 1f cmpwi cr0,r4,0 beq 1f diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4c5891de162e..8eb857f216c1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -444,13 +444,13 @@ void migrate_irqs(void) cpumask_and(mask, data->affinity, map); if (cpumask_any(mask) >= nr_cpu_ids) { - printk("Breaking affinity for irq %i\n", irq); + pr_warn("Breaking affinity for irq %i\n", irq); cpumask_copy(mask, map); } if (chip->irq_set_affinity) chip->irq_set_affinity(data, mask, true); else if (desc->action && !(warned++)) - printk("Cannot set affinity for irq %i\n", irq); + pr_err("Cannot set affinity for irq %i\n", irq); } free_cpumask_var(mask); @@ -470,7 +470,7 @@ static inline void check_stack_overflow(void) /* check for stack overflow: is there less than 2KB free? */ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { - printk("do_IRQ: stack overflow: %ld\n", + pr_err("do_IRQ: stack overflow: %ld\n", sp - sizeof(struct thread_info)); dump_stack(); } diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 936258881c98..7b750c4ed5c7 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -35,7 +35,7 @@ static struct legacy_serial_info { phys_addr_t taddr; } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; -static struct of_device_id legacy_serial_parents[] __initdata = { +static const struct of_device_id legacy_serial_parents[] __initconst = { {.type = "soc",}, {.type = "tsi-bridge",}, {.type = "opb", }, diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 6cff040bf456..c94d2e018d84 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/moduleloader.h> #include <linux/elf.h> @@ -28,12 +31,6 @@ #include <linux/sort.h> #include <asm/setup.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt , ...) -#endif - /* Count how many different relocations (different symbol, different addend) */ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) @@ -121,8 +118,8 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, continue; if (sechdrs[i].sh_type == SHT_RELA) { - DEBUGP("Found relocations in section %u\n", i); - DEBUGP("Ptr: %p. Number: %u\n", + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %u\n", (void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf32_Rela)); @@ -161,7 +158,7 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, me->arch.core_plt_section = i; } if (!me->arch.core_plt_section || !me->arch.init_plt_section) { - printk("Module doesn't contain .plt or .init.plt sections.\n"); + pr_err("Module doesn't contain .plt or .init.plt sections.\n"); return -ENOEXEC; } @@ -189,7 +186,7 @@ static uint32_t do_plt_call(void *location, { struct ppc_plt_entry *entry; - DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); + pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ if (location >= mod->module_core && location < mod->module_core + mod->core_size) @@ -208,7 +205,7 @@ static uint32_t do_plt_call(void *location, entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ entry->jump[3] = 0x4e800420; /* bctr */ - DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); + pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; } @@ -224,7 +221,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, uint32_t *location; uint32_t value; - DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { /* This is where to make the change */ @@ -268,17 +265,17 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, sechdrs, module); /* Only replace bits 2 through 26 */ - DEBUGP("REL24 value = %08X. location = %08X\n", + pr_debug("REL24 value = %08X. location = %08X\n", value, (uint32_t)location); - DEBUGP("Location before: %08X.\n", + pr_debug("Location before: %08X.\n", *(uint32_t *)location); *(uint32_t *)location = (*(uint32_t *)location & ~0x03fffffc) | ((value - (uint32_t)location) & 0x03fffffc); - DEBUGP("Location after: %08X.\n", + pr_debug("Location after: %08X.\n", *(uint32_t *)location); - DEBUGP("ie. jump to %08X+%08X = %08X\n", + pr_debug("ie. jump to %08X+%08X = %08X\n", *(uint32_t *)location & 0x03fffffc, (uint32_t)location, (*(uint32_t *)location & 0x03fffffc) @@ -291,7 +288,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, break; default: - printk("%s: unknown ADD relocation: %u\n", + pr_err("%s: unknown ADD relocation: %u\n", module->name, ELF32_R_TYPE(rela[i].r_info)); return -ENOEXEC; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index d807ee626af9..68384514506b 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/elf.h> #include <linux/moduleloader.h> @@ -36,11 +39,6 @@ Using a magic allocator which places modules within 32MB solves this, and makes other things simpler. Anton? --RR. */ -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt , ...) -#endif #if defined(_CALL_ELF) && _CALL_ELF == 2 #define R2_STACK_OFFSET 24 @@ -279,8 +277,8 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, /* Every relocated section... */ for (i = 1; i < hdr->e_shnum; i++) { if (sechdrs[i].sh_type == SHT_RELA) { - DEBUGP("Found relocations in section %u\n", i); - DEBUGP("Ptr: %p. Number: %lu\n", + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %Lu\n", (void *)sechdrs[i].sh_addr, sechdrs[i].sh_size / sizeof(Elf64_Rela)); @@ -304,7 +302,7 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, relocs++; #endif - DEBUGP("Looks like a total of %lu stubs, max\n", relocs); + pr_debug("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } @@ -390,7 +388,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, } if (!me->arch.stubs_section) { - printk("%s: doesn't contain .stubs.\n", me->name); + pr_err("%s: doesn't contain .stubs.\n", me->name); return -ENOEXEC; } @@ -434,11 +432,11 @@ static inline int create_stub(Elf64_Shdr *sechdrs, /* Stub uses address relative to r2. */ reladdr = (unsigned long)entry - my_r2(sechdrs, me); if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - printk("%s: Address %p of stub out of range of %p.\n", + pr_err("%s: Address %p of stub out of range of %p.\n", me->name, (void *)reladdr, (void *)my_r2); return 0; } - DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr); + pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); entry->jump[0] |= PPC_HA(reladdr); entry->jump[1] |= PPC_LO(reladdr); @@ -477,7 +475,7 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, static int restore_r2(u32 *instruction, struct module *me) { if (*instruction != PPC_INST_NOP) { - printk("%s: Expect noop after relocate, got %08x\n", + pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; } @@ -498,7 +496,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, unsigned long *location; unsigned long value; - DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); /* First time we're called, we can fix up .TOC. */ @@ -519,7 +517,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + ELF64_R_SYM(rela[i].r_info); - DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n", + pr_debug("RELOC at %p: %li-type as %s (0x%lx) + %li\n", location, (long)ELF64_R_TYPE(rela[i].r_info), strtab + sym->st_name, (unsigned long)sym->st_value, (long)rela[i].r_addend); @@ -546,7 +544,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if (value + 0x8000 > 0xffff) { - printk("%s: bad TOC16 relocation (%lu)\n", + pr_err("%s: bad TOC16 relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -567,7 +565,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if ((value & 3) != 0 || value + 0x8000 > 0xffff) { - printk("%s: bad TOC16_DS relocation (%lu)\n", + pr_err("%s: bad TOC16_DS relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -580,7 +578,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if ((value & 3) != 0) { - printk("%s: bad TOC16_LO_DS relocation (%lu)\n", + pr_err("%s: bad TOC16_LO_DS relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -613,7 +611,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Convert value to relative */ value -= (unsigned long)location; if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){ - printk("%s: REL24 %li out of range!\n", + pr_err("%s: REL24 %li out of range!\n", me->name, (long int)value); return -ENOEXEC; } @@ -655,7 +653,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, break; default: - printk("%s: Unknown ADD relocation: %lu\n", + pr_err("%s: Unknown ADD relocation: %lu\n", me->name, (unsigned long)ELF64_R_TYPE(rela[i].r_info)); return -ENOEXEC; diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 28b898e68185..34f7c9b7cd96 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -567,7 +567,7 @@ static int __init nvram_init(void) return rc; } -void __exit nvram_cleanup(void) +static void __exit nvram_cleanup(void) { misc_deregister( &nvram_dev ); } diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index a7b743076720..f87bc1b4bdda 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -97,7 +97,7 @@ static int of_pci_phb_probe(struct platform_device *dev) return 0; } -static struct of_device_id of_pci_phb_ids[] = { +static const struct of_device_id of_pci_phb_ids[] = { { .type = "pci", }, { .type = "pcix", }, { .type = "pcie", }, diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index b2814e23e1ed..bd70a51d5747 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1140,7 +1140,7 @@ static int reparent_resources(struct resource *parent, * as well. */ -void pcibios_allocate_bus_resources(struct pci_bus *bus) +static void pcibios_allocate_bus_resources(struct pci_bus *bus) { struct pci_bus *b; int i; @@ -1561,7 +1561,6 @@ EARLY_PCI_OP(write, byte, u8) EARLY_PCI_OP(write, word, u16) EARLY_PCI_OP(write, dword, u32) -extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); int early_find_capability(struct pci_controller *hose, int bus, int devfn, int cap) { diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 44562aa97f16..e6245e9c7d8d 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def) * @addr0: value of 1st cell of a device tree PCI address. * @bridge: Set this flag if the address is from a bridge 'ranges' property */ -unsigned int pci_parse_of_flags(u32 addr0, int bridge) +static unsigned int pci_parse_of_flags(u32 addr0, int bridge) { unsigned int flags = 0; diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 48d17d6fca5b..c4dfff6c2719 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -1,207 +1,41 @@ -#include <linux/export.h> -#include <linux/threads.h> -#include <linux/smp.h> -#include <linux/sched.h> -#include <linux/elfcore.h> -#include <linux/string.h> -#include <linux/interrupt.h> -#include <linux/screen_info.h> -#include <linux/vt_kern.h> -#include <linux/nvram.h> -#include <linux/irq.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/bitops.h> +#include <linux/ftrace.h> +#include <linux/mm.h> -#include <asm/page.h> #include <asm/processor.h> -#include <asm/cacheflush.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <linux/atomic.h> -#include <asm/checksum.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <linux/adb.h> -#include <linux/cuda.h> -#include <linux/pmu.h> -#include <asm/prom.h> -#include <asm/pci-bridge.h> -#include <asm/irq.h> -#include <asm/pmac_feature.h> -#include <asm/dma.h> -#include <asm/machdep.h> -#include <asm/hw_irq.h> -#include <asm/nvram.h> -#include <asm/mmu_context.h> -#include <asm/backlight.h> -#include <asm/time.h> -#include <asm/cputable.h> -#include <asm/btext.h> -#include <asm/div64.h> -#include <asm/signal.h> -#include <asm/dcr.h> -#include <asm/ftrace.h> #include <asm/switch_to.h> +#include <asm/cacheflush.h> #include <asm/epapr_hcalls.h> -#ifdef CONFIG_PPC32 -extern void transfer_to_handler(void); -extern void do_IRQ(struct pt_regs *regs); -extern void machine_check_exception(struct pt_regs *regs); -extern void alignment_exception(struct pt_regs *regs); -extern void program_check_exception(struct pt_regs *regs); -extern void single_step_exception(struct pt_regs *regs); -extern int sys_sigreturn(struct pt_regs *regs); +EXPORT_SYMBOL(flush_dcache_range); +EXPORT_SYMBOL(flush_icache_range); -EXPORT_SYMBOL(clear_pages); -EXPORT_SYMBOL(ISA_DMA_THRESHOLD); -EXPORT_SYMBOL(DMA_MODE_READ); -EXPORT_SYMBOL(DMA_MODE_WRITE); +EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(transfer_to_handler); -EXPORT_SYMBOL(do_IRQ); -EXPORT_SYMBOL(machine_check_exception); -EXPORT_SYMBOL(alignment_exception); -EXPORT_SYMBOL(program_check_exception); -EXPORT_SYMBOL(single_step_exception); -EXPORT_SYMBOL(sys_sigreturn); -#endif +long long __bswapdi2(long long); +EXPORT_SYMBOL(__bswapdi2); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); #endif -EXPORT_SYMBOL(strcpy); -EXPORT_SYMBOL(strncpy); -EXPORT_SYMBOL(strcat); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strncmp); - -#ifndef CONFIG_GENERIC_CSUM -EXPORT_SYMBOL(csum_partial); -EXPORT_SYMBOL(csum_partial_copy_generic); -EXPORT_SYMBOL(ip_fast_csum); -EXPORT_SYMBOL(csum_tcpudp_magic); -#endif - -EXPORT_SYMBOL(__copy_tofrom_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(copy_page); - -#if defined(CONFIG_PCI) && defined(CONFIG_PPC32) -EXPORT_SYMBOL(isa_io_base); -EXPORT_SYMBOL(isa_mem_base); -EXPORT_SYMBOL(pci_dram_offset); -#endif /* CONFIG_PCI */ - -EXPORT_SYMBOL(start_thread); - #ifdef CONFIG_PPC_FPU EXPORT_SYMBOL(giveup_fpu); EXPORT_SYMBOL(load_fp_state); EXPORT_SYMBOL(store_fp_state); #endif + #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX -EXPORT_SYMBOL(giveup_vsx); -EXPORT_SYMBOL_GPL(__giveup_vsx); -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE -EXPORT_SYMBOL(giveup_spe); -#endif /* CONFIG_SPE */ - -#ifndef CONFIG_PPC64 -EXPORT_SYMBOL(flush_instruction_cache); #endif -EXPORT_SYMBOL(flush_dcache_range); -EXPORT_SYMBOL(flush_icache_range); -#ifdef CONFIG_SMP -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(smp_hw_index); -#endif -#endif - -#ifdef CONFIG_ADB -EXPORT_SYMBOL(adb_request); -EXPORT_SYMBOL(adb_register); -EXPORT_SYMBOL(adb_unregister); -EXPORT_SYMBOL(adb_poll); -EXPORT_SYMBOL(adb_try_handler_change); -#endif /* CONFIG_ADB */ -#ifdef CONFIG_ADB_CUDA -EXPORT_SYMBOL(cuda_request); -EXPORT_SYMBOL(cuda_poll); -#endif /* CONFIG_ADB_CUDA */ -EXPORT_SYMBOL(to_tm); - -#ifdef CONFIG_PPC32 -long long __ashrdi3(long long, int); -long long __ashldi3(long long, int); -long long __lshrdi3(long long, int); -EXPORT_SYMBOL(__ashrdi3); -EXPORT_SYMBOL(__ashldi3); -EXPORT_SYMBOL(__lshrdi3); -int __ucmpdi2(unsigned long long, unsigned long long); -EXPORT_SYMBOL(__ucmpdi2); -int __cmpdi2(long long, long long); -EXPORT_SYMBOL(__cmpdi2); -#endif -long long __bswapdi2(long long); -EXPORT_SYMBOL(__bswapdi2); -EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(memcmp); -EXPORT_SYMBOL(memchr); - -#if defined(CONFIG_FB_VGA16_MODULE) -EXPORT_SYMBOL(screen_info); -#endif - -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(tb_ticks_per_jiffy); -EXPORT_SYMBOL(cacheable_memcpy); -EXPORT_SYMBOL(cacheable_memzero); -#endif - -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(switch_mmu_context); -#endif - -#ifdef CONFIG_PPC_STD_MMU_32 -extern long mol_trampoline; -EXPORT_SYMBOL(mol_trampoline); /* For MOL */ -EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ -#ifdef CONFIG_SMP -extern int mmu_hash_lock; -EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ -#endif /* CONFIG_SMP */ -extern long *intercept_table; -EXPORT_SYMBOL(intercept_table); -#endif /* CONFIG_PPC_STD_MMU_32 */ -#ifdef CONFIG_PPC_DCR_NATIVE -EXPORT_SYMBOL(__mtdcr); -EXPORT_SYMBOL(__mfdcr); -#endif -EXPORT_SYMBOL(empty_zero_page); - -#ifdef CONFIG_PPC64 -EXPORT_SYMBOL(__arch_hweight8); -EXPORT_SYMBOL(__arch_hweight16); -EXPORT_SYMBOL(__arch_hweight32); -EXPORT_SYMBOL(__arch_hweight64); +#ifdef CONFIG_VSX +EXPORT_SYMBOL_GPL(__giveup_vsx); #endif -#ifdef CONFIG_PPC_BOOK3S_64 -EXPORT_SYMBOL_GPL(mmu_psize_defs); +#ifdef CONFIG_SPE +EXPORT_SYMBOL(giveup_spe); #endif #ifdef CONFIG_EPAPR_PARAVIRT diff --git a/arch/powerpc/kernel/ppc_ksyms_32.c b/arch/powerpc/kernel/ppc_ksyms_32.c new file mode 100644 index 000000000000..30ddd8a24eee --- /dev/null +++ b/arch/powerpc/kernel/ppc_ksyms_32.c @@ -0,0 +1,61 @@ +#include <linux/export.h> +#include <linux/smp.h> + +#include <asm/page.h> +#include <asm/dma.h> +#include <asm/io.h> +#include <asm/hw_irq.h> +#include <asm/time.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/dcr.h> + +EXPORT_SYMBOL(clear_pages); +EXPORT_SYMBOL(ISA_DMA_THRESHOLD); +EXPORT_SYMBOL(DMA_MODE_READ); +EXPORT_SYMBOL(DMA_MODE_WRITE); + +#if defined(CONFIG_PCI) +EXPORT_SYMBOL(isa_io_base); +EXPORT_SYMBOL(isa_mem_base); +EXPORT_SYMBOL(pci_dram_offset); +#endif + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(smp_hw_index); +#endif + +long long __ashrdi3(long long, int); +long long __ashldi3(long long, int); +long long __lshrdi3(long long, int); +int __ucmpdi2(unsigned long long, unsigned long long); +int __cmpdi2(long long, long long); +EXPORT_SYMBOL(__ashrdi3); +EXPORT_SYMBOL(__ashldi3); +EXPORT_SYMBOL(__lshrdi3); +EXPORT_SYMBOL(__ucmpdi2); +EXPORT_SYMBOL(__cmpdi2); + +EXPORT_SYMBOL(timer_interrupt); +EXPORT_SYMBOL(tb_ticks_per_jiffy); + +EXPORT_SYMBOL(switch_mmu_context); + +#ifdef CONFIG_PPC_STD_MMU_32 +extern long mol_trampoline; +EXPORT_SYMBOL(mol_trampoline); /* For MOL */ +EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ +#ifdef CONFIG_SMP +extern int mmu_hash_lock; +EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ +#endif /* CONFIG_SMP */ +extern long *intercept_table; +EXPORT_SYMBOL(intercept_table); +#endif /* CONFIG_PPC_STD_MMU_32 */ + +#ifdef CONFIG_PPC_DCR_NATIVE +EXPORT_SYMBOL(__mtdcr); +EXPORT_SYMBOL(__mfdcr); +#endif + +EXPORT_SYMBOL(flush_instruction_cache); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bf44ae962ab8..aa1df89c8b2a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -228,6 +228,7 @@ void giveup_vsx(struct task_struct *tsk) giveup_altivec_maybe_transactional(tsk); __giveup_vsx(tsk); } +EXPORT_SYMBOL(giveup_vsx); void flush_vsx_to_thread(struct task_struct *tsk) { @@ -1316,6 +1317,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ } +EXPORT_SYMBOL(start_thread); #define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \ | PR_FP_EXC_RES | PR_FP_EXC_INV) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4e139f8a69ef..099f27e6d1b0 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -386,8 +386,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, - int depth, void *data) +static int __init early_init_dt_scan_chosen_ppc(unsigned long node, + const char *uname, + int depth, void *data) { const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ @@ -641,6 +642,10 @@ void __init early_init_devtree(void *params) DBG(" -> early_init_devtree(%p)\n", params); + /* Too early to BUG_ON(), do it by hand */ + if (!early_init_dt_verify(params)) + panic("BUG: Failed verifying flat device tree, bad version?"); + /* Setup flat device-tree pointer */ initial_boot_params = params; @@ -663,14 +668,12 @@ void __init early_init_devtree(void *params) * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); + of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); /* Scan memory nodes and rebuild MEMBLOCKs */ of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); - /* Save command line for /proc/cmdline and then parse parameters */ - strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); parse_early_param(); /* make sure we've parsed cmdline for mem= before this */ diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index fe8e54b9ef7d..12640f7e726b 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -50,24 +50,14 @@ do done # ignore register save/restore funcitons - if [ "${UNDEF:0:9}" = "_restgpr_" ]; then + case $UNDEF in + _restgpr_*|_restgpr0_*|_rest32gpr_*) OK=1 - fi - if [ "${UNDEF:0:10}" = "_restgpr0_" ]; then - OK=1 - fi - if [ "${UNDEF:0:11}" = "_rest32gpr_" ]; then - OK=1 - fi - if [ "${UNDEF:0:9}" = "_savegpr_" ]; then + ;; + _savegpr_*|_savegpr0_*|_save32gpr_*) OK=1 - fi - if [ "${UNDEF:0:10}" = "_savegpr0_" ]; then - OK=1 - fi - if [ "${UNDEF:0:11}" = "_save32gpr_" ]; then - OK=1 - fi + ;; + esac if [ $OK -eq 0 ]; then ERROR=1 diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 2e3d2bf536c5..cdb404ea3468 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -932,7 +932,7 @@ void ptrace_triggered(struct perf_event *bp, } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ -int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, +static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index e736387fee6a..5a2c049c1c61 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -286,7 +286,7 @@ static void prrn_work_fn(struct work_struct *work) static DECLARE_WORK(prrn_work, prrn_work_fn); -void prrn_schedule_update(u32 scope) +static void prrn_schedule_update(u32 scope) { flush_work(&prrn_work); prrn_update_scope = scope; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1b0e26013a62..1362cd62b3fa 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -81,8 +81,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); unsigned long klimit = (unsigned long) _end; -char cmd_line[COMMAND_LINE_SIZE]; - /* * This still seems to be needed... -- paulus */ @@ -94,6 +92,9 @@ struct screen_info screen_info = { .orig_video_isVGA = 1, .orig_video_points = 16 }; +#if defined(CONFIG_FB_VGA16_MODULE) +EXPORT_SYMBOL(screen_info); +#endif /* Variables required to store legacy IO irq routing */ int of_i8042_kbd_irq; @@ -382,7 +383,7 @@ void __init check_for_initrd(void) initrd_start = initrd_end = 0; if (initrd_start) - printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); + pr_info("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); DBG(" <- check_for_initrd()\n"); #endif /* CONFIG_BLK_DEV_INITRD */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ea4fda60e57b..07831ed0d9ef 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -268,7 +268,7 @@ static void __init exc_lvl_early_init(void) /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { - *cmdline_p = cmd_line; + *cmdline_p = boot_command_line; /* so udelay does something sensible, assume <= 1000 bogomips */ loops_per_jiffy = 500000000 / HZ; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 75d62d63fe68..cd07d79ad21c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -525,21 +525,31 @@ void __init setup_system(void) printk("Starting Linux PPC64 %s\n", init_utsname()->version); printk("-----------------------------------------------------\n"); - printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size()); + printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + printk("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); + if (ppc64_caches.dline_size != 0x80) - printk("ppc64_caches.dcache_line_size = 0x%x\n", - ppc64_caches.dline_size); + printk("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); if (ppc64_caches.iline_size != 0x80) - printk("ppc64_caches.icache_line_size = 0x%x\n", - ppc64_caches.iline_size); + printk("icache_line_size = 0x%x\n", ppc64_caches.iline_size); + + printk("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + printk(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); + printk(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); + printk("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, + cur_cpu_spec->cpu_user_features2); + printk("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); + printk("firmware_features = 0x%016lx\n", powerpc_firmware_features); + #ifdef CONFIG_PPC_STD_MMU_64 if (htab_address) - printk("htab_address = 0x%p\n", htab_address); - printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif /* CONFIG_PPC_STD_MMU_64 */ + printk("htab_address = 0x%p\n", htab_address); + + printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif + if (PHYSICAL_START > 0) - printk("physical_start = 0x%llx\n", + printk("physical_start = 0x%llx\n", (unsigned long long)PHYSICAL_START); printk("-----------------------------------------------------\n"); @@ -657,7 +667,7 @@ void __init setup_arch(char **cmdline_p) { ppc64_boot_msg(0x12, "Setup Arch"); - *cmdline_p = cmd_line; + *cmdline_p = boot_command_line; /* * Set cache line size based on type of cpu as a default. diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a0738af4aba6..71e186d5f331 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -52,6 +52,7 @@ #endif #include <asm/vdso.h> #include <asm/debug.h> +#include <asm/kexec.h> #ifdef DEBUG #include <asm/udbg.h> @@ -379,8 +380,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* * numa_node_id() works after this. */ - set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]); - set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu])); + if (cpu_present(cpu)) { + set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]); + set_cpu_numa_mem(cpu, + local_memory_node(numa_cpu_lookup_table[cpu])); + } } cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); @@ -728,6 +732,9 @@ void start_secondary(void *unused) } traverse_core_siblings(cpu, true); + set_numa_node(numa_cpu_lookup_table[cpu]); + set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); + smp_wmb(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 368ab374d33c..7505599c2593 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -479,7 +479,7 @@ void arch_irq_work_raise(void) #endif /* CONFIG_IRQ_WORK */ -void __timer_interrupt(void) +static void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); u64 *next_tb = &__get_cpu_var(decrementers_next_tb); @@ -643,7 +643,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) return found; } -void start_cpu_decrementer(void) +static void start_cpu_decrementer(void) { #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) /* Clear any pending timer interrupts */ @@ -1024,6 +1024,7 @@ void to_tm(int tim, struct rtc_time * tm) */ GregorianDay(tm); } +EXPORT_SYMBOL(to_tm); /* * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 59fa2de9546d..9f342f134ae4 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -10,7 +10,7 @@ CFLAGS_REMOVE_code-patching.o = -pg CFLAGS_REMOVE_feature-fixups.o = -pg obj-y := string.o alloc.o \ - crtsavres.o + crtsavres.o ppc_ksyms.o obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_HAS_IOMEM) += devres.o diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 7a8a7487cee8..7ce3870d7ddd 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -164,7 +164,7 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p) return (unsigned long)p - (unsigned long)entry; } -void test_basic_patching(void) +static void test_basic_patching(void) { extern unsigned int ftr_fixup_test1; extern unsigned int end_ftr_fixup_test1; diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c new file mode 100644 index 000000000000..f993959647b5 --- /dev/null +++ b/arch/powerpc/lib/ppc_ksyms.c @@ -0,0 +1,39 @@ +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/bitops.h> +#include <net/checksum.h> + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memchr); +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(cacheable_memcpy); +EXPORT_SYMBOL(cacheable_memzero); +#endif + +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); + +#ifndef CONFIG_GENERIC_CSUM +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(ip_fast_csum); +EXPORT_SYMBOL(csum_tcpudp_magic); +#endif + +EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(copy_page); + +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__arch_hweight64); +#endif diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5c09f365c842..54651fc2d412 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -98,13 +98,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; ea = (signed short) instr; /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (instr & 0x04000000) { /* update forms */ - if ((instr>>26) != 47) /* stmw is not an update form */ - regs->gpr[ra] = ea; - } - } return truncate_if_32bit(regs->msr, ea); } @@ -120,11 +115,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg ra = (instr >> 16) & 0x1f; ea = (signed short) (instr & ~3); /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if ((instr & 3) == 1) /* update forms */ - regs->gpr[ra] = ea; - } return truncate_if_32bit(regs->msr, ea); } @@ -133,8 +125,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg /* * Calculate effective address for an X-form instruction */ -static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs, - int do_update) +static unsigned long __kprobes xform_ea(unsigned int instr, + struct pt_regs *regs) { int ra, rb; unsigned long ea; @@ -142,11 +134,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; rb = (instr >> 11) & 0x1f; ea = regs->gpr[rb]; - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (do_update) /* update forms */ - regs->gpr[ra] = ea; - } return truncate_if_32bit(regs->msr, ea); } @@ -611,6 +600,23 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift); } +static int __kprobes trap_compare(long v1, long v2) +{ + int ret = 0; + + if (v1 < v2) + ret |= 0x10; + else if (v1 > v2) + ret |= 0x08; + else + ret |= 0x04; + if ((unsigned long)v1 < (unsigned long)v2) + ret |= 0x02; + else if ((unsigned long)v1 > (unsigned long)v2) + ret |= 0x01; + return ret; +} + /* * Elements of 32-bit rotate and mask instructions. */ @@ -627,26 +633,27 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, #define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x)) /* - * Emulate instructions that cause a transfer of control, - * loads and stores, and a few other instructions. - * Returns 1 if the step was emulated, 0 if not, - * or -1 if the instruction is one that should not be stepped, - * such as an rfid, or a mtmsrd that would clear MSR_RI. + * Decode an instruction, and execute it if that can be done just by + * modifying *regs (i.e. integer arithmetic and logical instructions, + * branches, and barrier instructions). + * Returns 1 if the instruction has been executed, or 0 if not. + * Sets *op to indicate what the instruction does. */ -int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, + unsigned int instr) { unsigned int opcode, ra, rb, rd, spr, u; unsigned long int imm; unsigned long int val, val2; - unsigned long int ea; - unsigned int cr, mb, me, sh; - int err; - unsigned long old_ra, val3; + unsigned int mb, me, sh; long ival; + op->type = COMPUTE; + opcode = instr >> 26; switch (opcode) { case 16: /* bc */ + op->type = BRANCH; imm = (signed short)(instr & 0xfffc); if ((instr & 2) == 0) imm += regs->nip; @@ -659,26 +666,14 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ - /* - * N.B. this uses knowledge about how the syscall - * entry code works. If that is changed, this will - * need to be changed also. - */ - if (regs->gpr[0] == 0x1ebe && - cpu_has_feature(CPU_FTR_REAL_LE)) { - regs->msr ^= MSR_LE; - goto instr_done; - } - regs->gpr[9] = regs->gpr[13]; - regs->gpr[10] = MSR_KERNEL; - regs->gpr[11] = regs->nip + 4; - regs->gpr[12] = regs->msr & MSR_MASK; - regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_common; - regs->msr = MSR_KERNEL; - return 1; + if ((instr & 0xfe2) == 2) + op->type = SYSCALL; + else + op->type = UNKNOWN; + return 0; #endif case 18: /* b */ + op->type = BRANCH; imm = instr & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; @@ -691,8 +686,16 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; case 19: switch ((instr >> 1) & 0x3ff) { + case 0: /* mcrf */ + rd = (instr >> 21) & 0x1c; + ra = (instr >> 16) & 0x1c; + val = (regs->ccr >> ra) & 0xf; + regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); + goto instr_done; + case 16: /* bclr */ case 528: /* bcctr */ + op->type = BRANCH; imm = (instr & 0x400)? regs->ctr: regs->link; regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); imm = truncate_if_32bit(regs->msr, imm); @@ -703,9 +706,13 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; case 18: /* rfid, scary */ - return -1; + if (regs->msr & MSR_PR) + goto priv; + op->type = RFI; + return 0; case 150: /* isync */ + op->type = BARRIER; isync(); goto instr_done; @@ -731,6 +738,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 31: switch ((instr >> 1) & 0x3ff) { case 598: /* sync */ + op->type = BARRIER; #ifdef __powerpc64__ switch ((instr >> 21) & 3) { case 1: /* lwsync */ @@ -745,6 +753,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case 854: /* eieio */ + op->type = BARRIER; eieio(); goto instr_done; } @@ -760,6 +769,17 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) rb = (instr >> 11) & 0x1f; switch (opcode) { +#ifdef __powerpc64__ + case 2: /* tdi */ + if (rd & trap_compare(regs->gpr[ra], (short) instr)) + goto trap; + goto instr_done; +#endif + case 3: /* twi */ + if (rd & trap_compare((int)regs->gpr[ra], (short) instr)) + goto trap; + goto instr_done; + case 7: /* mulli */ regs->gpr[rd] = regs->gpr[ra] * (short) instr; goto instr_done; @@ -908,35 +928,44 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 31: switch ((instr >> 1) & 0x3ff) { + case 4: /* tw */ + if (rd == 0x1f || + (rd & trap_compare((int)regs->gpr[ra], + (int)regs->gpr[rb]))) + goto trap; + goto instr_done; +#ifdef __powerpc64__ + case 68: /* td */ + if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb])) + goto trap; + goto instr_done; +#endif case 83: /* mfmsr */ if (regs->msr & MSR_PR) - break; - regs->gpr[rd] = regs->msr & MSR_MASK; - goto instr_done; + goto priv; + op->type = MFMSR; + op->reg = rd; + return 0; case 146: /* mtmsr */ if (regs->msr & MSR_PR) - break; - imm = regs->gpr[rd]; - if ((imm & MSR_RI) == 0) - /* can't step mtmsr that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + goto priv; + op->type = MTMSR; + op->reg = rd; + op->val = 0xffffffff & ~(MSR_ME | MSR_LE); + return 0; #ifdef CONFIG_PPC64 case 178: /* mtmsrd */ - /* only MSR_EE and MSR_RI get changed if bit 15 set */ - /* mtmsrd doesn't change MSR_HV and MSR_ME */ if (regs->msr & MSR_PR) - break; - imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL; - imm = (regs->msr & MSR_MASK & ~imm) - | (regs->gpr[rd] & imm); - if ((imm & MSR_RI) == 0) - /* can't step mtmsrd that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + goto priv; + op->type = MTMSR; + op->reg = rd; + /* only MSR_EE and MSR_RI get changed if bit 15 set */ + /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */ + imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL; + op->val = imm; + return 0; #endif + case 19: /* mfcr */ regs->gpr[rd] = regs->ccr; regs->gpr[rd] &= 0xffffffffUL; @@ -954,33 +983,43 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case 339: /* mfspr */ - spr = (instr >> 11) & 0x3ff; + spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); switch (spr) { - case 0x20: /* mfxer */ + case SPRN_XER: /* mfxer */ regs->gpr[rd] = regs->xer; regs->gpr[rd] &= 0xffffffffUL; goto instr_done; - case 0x100: /* mflr */ + case SPRN_LR: /* mflr */ regs->gpr[rd] = regs->link; goto instr_done; - case 0x120: /* mfctr */ + case SPRN_CTR: /* mfctr */ regs->gpr[rd] = regs->ctr; goto instr_done; + default: + op->type = MFSPR; + op->reg = rd; + op->spr = spr; + return 0; } break; case 467: /* mtspr */ - spr = (instr >> 11) & 0x3ff; + spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); switch (spr) { - case 0x20: /* mtxer */ + case SPRN_XER: /* mtxer */ regs->xer = (regs->gpr[rd] & 0xffffffffUL); goto instr_done; - case 0x100: /* mtlr */ + case SPRN_LR: /* mtlr */ regs->link = regs->gpr[rd]; goto instr_done; - case 0x120: /* mtctr */ + case SPRN_CTR: /* mtctr */ regs->ctr = regs->gpr[rd]; goto instr_done; + default: + op->type = MTSPR; + op->val = regs->gpr[rd]; + op->spr = spr; + return 0; } break; @@ -1257,294 +1296,242 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) * Cache instructions */ case 54: /* dcbst */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbst"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBST, 0); + op->ea = xform_ea(instr, regs); + return 0; case 86: /* dcbf */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbf"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBF, 0); + op->ea = xform_ea(instr, regs); + return 0; case 246: /* dcbtst */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetchw((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(instr, regs); + op->reg = rd; + return 0; case 278: /* dcbt */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetch((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(instr, regs); + op->reg = rd; + return 0; + case 982: /* icbi */ + op->type = MKOP(CACHEOP, ICBI, 0); + op->ea = xform_ea(instr, regs); + return 0; } break; } /* - * Following cases are for loads and stores, so bail out - * if we're in little-endian mode. + * Loads and stores. */ - if (regs->msr & MSR_LE) - return 0; - - /* - * Save register RA in case it's an update form load or store - * and the access faults. - */ - old_ra = regs->gpr[ra]; + op->type = UNKNOWN; + op->update_reg = ra; + op->reg = rd; + op->val = regs->gpr[rd]; + u = (instr >> 20) & UPDATE; switch (opcode) { case 31: - u = instr & 0x40; + u = instr & UPDATE; + op->ea = xform_ea(instr, regs); switch ((instr >> 1) & 0x3ff) { case 20: /* lwarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "lwarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 4); + break; case 150: /* stwcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stwcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 4); + break; #ifdef __powerpc64__ case 84: /* ldarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "ldarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 8); + break; case 214: /* stdcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stdcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 8); + break; case 21: /* ldx */ case 53: /* ldux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 8); + break; #endif case 23: /* lwzx */ case 55: /* lwzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + break; case 87: /* lbzx */ case 119: /* lbzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + break; #ifdef CONFIG_ALTIVEC case 103: /* lvx */ case 359: /* lvxl */ if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_load(rd, do_lvx, ea, regs); - goto ldst_done; + goto vecunavail; + op->type = MKOP(LOAD_VMX, 0, 16); + break; case 231: /* stvx */ case 487: /* stvxl */ if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_store(rd, do_stvx, ea, regs); - goto ldst_done; + goto vecunavail; + op->type = MKOP(STORE_VMX, 0, 16); + break; #endif /* CONFIG_ALTIVEC */ #ifdef __powerpc64__ case 149: /* stdx */ case 181: /* stdux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 8); + break; #endif case 151: /* stwx */ case 183: /* stwux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + break; case 215: /* stbx */ case 247: /* stbux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + break; case 279: /* lhzx */ case 311: /* lhzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + break; #ifdef __powerpc64__ case 341: /* lwax */ case 373: /* lwaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 4); + break; #endif case 343: /* lhax */ case 375: /* lhaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + break; case 407: /* sthx */ case 439: /* sthux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + break; #ifdef __powerpc64__ case 532: /* ldbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 8, regs); - if (!err) - regs->gpr[rd] = byterev_8(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 8); + break; #endif + case 533: /* lswx */ + op->type = MKOP(LOAD_MULTI, 0, regs->xer & 0x7f); + break; case 534: /* lwbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 4, regs); - if (!err) - regs->gpr[rd] = byterev_4(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 4); + break; + + case 597: /* lswi */ + if (rb == 0) + rb = 32; /* # bytes to load */ + op->type = MKOP(LOAD_MULTI, 0, rb); + op->ea = 0; + if (ra) + op->ea = truncate_if_32bit(regs->msr, + regs->gpr[ra]); + break; #ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 4); + break; case 599: /* lfdx */ case 631: /* lfdux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 8); + break; case 663: /* stfsx */ case 695: /* stfsux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 4); + break; case 727: /* stfdx */ case 759: /* stfdux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 8); + break; #endif #ifdef __powerpc64__ case 660: /* stdbrx */ - val = byterev_8(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 8); + op->val = byterev_8(regs->gpr[rd]); + break; #endif + case 661: /* stswx */ + op->type = MKOP(STORE_MULTI, 0, regs->xer & 0x7f); + break; + case 662: /* stwbrx */ - val = byterev_4(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 4); + op->val = byterev_4(regs->gpr[rd]); + break; + + case 725: + if (rb == 0) + rb = 32; /* # bytes to store */ + op->type = MKOP(STORE_MULTI, 0, rb); + op->ea = 0; + if (ra) + op->ea = truncate_if_32bit(regs->msr, + regs->gpr[ra]); + break; case 790: /* lhbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 2, regs); - if (!err) - regs->gpr[rd] = byterev_2(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 2); + break; case 918: /* sthbrx */ - val = byterev_2(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 2); + op->val = byterev_2(regs->gpr[rd]); + break; #ifdef CONFIG_VSX case 844: /* lxvd2x */ case 876: /* lxvd2ux */ if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_load(rd, do_lxvd2x, ea, regs); - goto ldst_done; + goto vsxunavail; + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, u, 16); + break; case 972: /* stxvd2x */ case 1004: /* stxvd2ux */ if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_store(rd, do_stxvd2x, ea, regs); - goto ldst_done; + goto vsxunavail; + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, u, 16); + break; #endif /* CONFIG_VSX */ } @@ -1552,178 +1539,123 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 32: /* lwz */ case 33: /* lwzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + op->ea = dform_ea(instr, regs); + break; case 34: /* lbz */ case 35: /* lbzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + op->ea = dform_ea(instr, regs); + break; case 36: /* stw */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 4, regs); - goto ldst_done; - case 37: /* stwu */ - val = regs->gpr[rd]; - val3 = dform_ea(instr, regs); - /* - * For PPC32 we always use stwu to change stack point with r1. So - * this emulated store may corrupt the exception frame, now we - * have to provide the exception frame trampoline, which is pushed - * below the kprobed function stack. So we only update gpr[1] but - * don't emulate the real store operation. We will do real store - * operation safely in exception return code by checking this flag. - */ - if ((ra == 1) && !(regs->msr & MSR_PR) \ - && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { -#ifdef CONFIG_PPC32 - /* - * Check if we will touch kernel sack overflow - */ - if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n"); - err = -EINVAL; - break; - } -#endif /* CONFIG_PPC32 */ - /* - * Check if we already set since that means we'll - * lose the previous value. - */ - WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); - set_thread_flag(TIF_EMULATE_STACK_STORE); - err = 0; - } else - err = write_mem(val, val3, 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + op->ea = dform_ea(instr, regs); + break; case 38: /* stb */ case 39: /* stbu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + op->ea = dform_ea(instr, regs); + break; case 40: /* lhz */ case 41: /* lhzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + op->ea = dform_ea(instr, regs); + break; case 42: /* lha */ case 43: /* lhau */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + op->ea = dform_ea(instr, regs); + break; case 44: /* sth */ case 45: /* sthu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + op->ea = dform_ea(instr, regs); + break; case 46: /* lmw */ - ra = (instr >> 16) & 0x1f; if (ra >= rd) break; /* invalid form, ra in range to load */ - ea = dform_ea(instr, regs); - do { - err = read_mem(®s->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd)); + op->ea = dform_ea(instr, regs); + break; case 47: /* stmw */ - ea = dform_ea(instr, regs); - do { - err = write_mem(regs->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd)); + op->ea = dform_ea(instr, regs); + break; #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 4); + op->ea = dform_ea(instr, regs); + break; case 50: /* lfd */ case 51: /* lfdu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 8); + op->ea = dform_ea(instr, regs); + break; case 52: /* stfs */ case 53: /* stfsu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 4); + op->ea = dform_ea(instr, regs); + break; case 54: /* stfd */ case 55: /* stfdu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 8); + op->ea = dform_ea(instr, regs); + break; #endif #ifdef __powerpc64__ case 58: /* ld[u], lwa */ + op->ea = dsform_ea(instr, regs); switch (instr & 3) { case 0: /* ld */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, 0, 8); + break; case 1: /* ldu */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, UPDATE, 8); + break; case 2: /* lwa */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT, 4); + break; } break; case 62: /* std[u] */ - val = regs->gpr[rd]; + op->ea = dsform_ea(instr, regs); switch (instr & 3) { case 0: /* std */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, 0, 8); + break; case 1: /* stdu */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, UPDATE, 8); + break; } break; #endif /* __powerpc64__ */ } - err = -EINVAL; - - ldst_done: - if (err) { - regs->gpr[ra] = old_ra; - return 0; /* invoke DSI if -EFAULT? */ - } - instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); - return 1; + return 0; logical_done: if (instr & 1) @@ -1733,5 +1665,349 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) arith_done: if (instr & 1) set_cr0(regs, rd); - goto instr_done; + + instr_done: + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + return 1; + + priv: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGPRIV; + return 0; + + trap: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGTRAP; + return 0; + +#ifdef CONFIG_PPC_FPU + fpunavail: + op->type = INTERRUPT | 0x800; + return 0; +#endif + +#ifdef CONFIG_ALTIVEC + vecunavail: + op->type = INTERRUPT | 0xf20; + return 0; +#endif + +#ifdef CONFIG_VSX + vsxunavail: + op->type = INTERRUPT | 0xf40; + return 0; +#endif +} +EXPORT_SYMBOL_GPL(analyse_instr); + +/* + * For PPC32 we always use stwu with r1 to change the stack pointer. + * So this emulated store may corrupt the exception frame, now we + * have to provide the exception frame trampoline, which is pushed + * below the kprobed function stack. So we only update gpr[1] but + * don't emulate the real store operation. We will do real store + * operation safely in exception return code by checking this flag. + */ +static __kprobes int handle_stack_update(unsigned long ea, struct pt_regs *regs) +{ +#ifdef CONFIG_PPC32 + /* + * Check if we will touch kernel stack overflow + */ + if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { + printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n"); + return -EINVAL; + } +#endif /* CONFIG_PPC32 */ + /* + * Check if we already set since that means we'll + * lose the previous value. + */ + WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); + set_thread_flag(TIF_EMULATE_STACK_STORE); + return 0; +} + +static __kprobes void do_signext(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = (signed short) *valp; + break; + case 4: + *valp = (signed int) *valp; + break; + } +} + +static __kprobes void do_byterev(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = byterev_2(*valp); + break; + case 4: + *valp = byterev_4(*valp); + break; +#ifdef __powerpc64__ + case 8: + *valp = byterev_8(*valp); + break; +#endif + } +} + +/* + * Emulate instructions that cause a transfer of control, + * loads and stores, and a few other instructions. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +{ + struct instruction_op op; + int r, err, size; + unsigned long val; + unsigned int cr; + int i, rd, nb; + + r = analyse_instr(&op, regs, instr); + if (r != 0) + return r; + + err = 0; + size = GETSIZE(op.type); + switch (op.type & INSTR_TYPE_MASK) { + case CACHEOP: + if (!address_ok(regs, op.ea, 8)) + return 0; + switch (op.type & CACHEOP_MASK) { + case DCBST: + __cacheop_user_asmx(op.ea, err, "dcbst"); + break; + case DCBF: + __cacheop_user_asmx(op.ea, err, "dcbf"); + break; + case DCBTST: + if (op.reg == 0) + prefetchw((void *) op.ea); + break; + case DCBT: + if (op.reg == 0) + prefetch((void *) op.ea); + break; + case ICBI: + __cacheop_user_asmx(op.ea, err, "icbi"); + break; + } + if (err) + return 0; + goto instr_done; + + case LARX: + if (regs->msr & MSR_LE) + return 0; + if (op.ea & (size - 1)) + break; /* can't handle misaligned */ + err = -EFAULT; + if (!address_ok(regs, op.ea, size)) + goto ldst_done; + err = 0; + switch (size) { + case 4: + __get_user_asmx(val, op.ea, err, "lwarx"); + break; + case 8: + __get_user_asmx(val, op.ea, err, "ldarx"); + break; + default: + return 0; + } + if (!err) + regs->gpr[op.reg] = val; + goto ldst_done; + + case STCX: + if (regs->msr & MSR_LE) + return 0; + if (op.ea & (size - 1)) + break; /* can't handle misaligned */ + err = -EFAULT; + if (!address_ok(regs, op.ea, size)) + goto ldst_done; + err = 0; + switch (size) { + case 4: + __put_user_asmx(op.val, op.ea, err, "stwcx.", cr); + break; + case 8: + __put_user_asmx(op.val, op.ea, err, "stdcx.", cr); + break; + default: + return 0; + } + if (!err) + regs->ccr = (regs->ccr & 0x0fffffff) | + (cr & 0xe0000000) | + ((regs->xer >> 3) & 0x10000000); + goto ldst_done; + + case LOAD: + if (regs->msr & MSR_LE) + return 0; + err = read_mem(®s->gpr[op.reg], op.ea, size, regs); + if (!err) { + if (op.type & SIGNEXT) + do_signext(®s->gpr[op.reg], size); + if (op.type & BYTEREV) + do_byterev(®s->gpr[op.reg], size); + } + goto ldst_done; + + case LOAD_FP: + if (regs->msr & MSR_LE) + return 0; + if (size == 4) + err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); + else + err = do_fp_load(op.reg, do_lfd, op.ea, size, regs); + goto ldst_done; + +#ifdef CONFIG_ALTIVEC + case LOAD_VMX: + if (regs->msr & MSR_LE) + return 0; + err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); + goto ldst_done; +#endif +#ifdef CONFIG_VSX + case LOAD_VSX: + if (regs->msr & MSR_LE) + return 0; + err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); + goto ldst_done; +#endif + case LOAD_MULTI: + if (regs->msr & MSR_LE) + return 0; + rd = op.reg; + for (i = 0; i < size; i += 4) { + nb = size - i; + if (nb > 4) + nb = 4; + err = read_mem(®s->gpr[rd], op.ea, nb, regs); + if (err) + return 0; + if (nb < 4) /* left-justify last bytes */ + regs->gpr[rd] <<= 32 - 8 * nb; + op.ea += 4; + ++rd; + } + goto instr_done; + + case STORE: + if (regs->msr & MSR_LE) + return 0; + if ((op.type & UPDATE) && size == sizeof(long) && + op.reg == 1 && op.update_reg == 1 && + !(regs->msr & MSR_PR) && + op.ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { + err = handle_stack_update(op.ea, regs); + goto ldst_done; + } + err = write_mem(op.val, op.ea, size, regs); + goto ldst_done; + + case STORE_FP: + if (regs->msr & MSR_LE) + return 0; + if (size == 4) + err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); + else + err = do_fp_store(op.reg, do_stfd, op.ea, size, regs); + goto ldst_done; + +#ifdef CONFIG_ALTIVEC + case STORE_VMX: + if (regs->msr & MSR_LE) + return 0; + err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); + goto ldst_done; +#endif +#ifdef CONFIG_VSX + case STORE_VSX: + if (regs->msr & MSR_LE) + return 0; + err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); + goto ldst_done; +#endif + case STORE_MULTI: + if (regs->msr & MSR_LE) + return 0; + rd = op.reg; + for (i = 0; i < size; i += 4) { + val = regs->gpr[rd]; + nb = size - i; + if (nb > 4) + nb = 4; + else + val >>= 32 - 8 * nb; + err = write_mem(val, op.ea, nb, regs); + if (err) + return 0; + op.ea += 4; + ++rd; + } + goto instr_done; + + case MFMSR: + regs->gpr[op.reg] = regs->msr & MSR_MASK; + goto instr_done; + + case MTMSR: + val = regs->gpr[op.reg]; + if ((val & MSR_RI) == 0) + /* can't step mtmsr[d] that would clear MSR_RI */ + return -1; + /* here op.val is the mask of bits to change */ + regs->msr = (regs->msr & ~op.val) | (val & op.val); + goto instr_done; + +#ifdef CONFIG_PPC64 + case SYSCALL: /* sc */ + /* + * N.B. this uses knowledge about how the syscall + * entry code works. If that is changed, this will + * need to be changed also. + */ + if (regs->gpr[0] == 0x1ebe && + cpu_has_feature(CPU_FTR_REAL_LE)) { + regs->msr ^= MSR_LE; + goto instr_done; + } + regs->gpr[9] = regs->gpr[13]; + regs->gpr[10] = MSR_KERNEL; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_common; + regs->msr = MSR_KERNEL; + return 1; + + case RFI: + return -1; +#endif + } + return 0; + + ldst_done: + if (err) + return 0; + if (op.type & UPDATE) + regs->gpr[op.update_reg] = op.ea; + + instr_done: + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + return 1; } diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index d0130fff20e5..325e861616a1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -34,3 +34,4 @@ obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o diff --git a/arch/powerpc/platforms/cell/spu_fault.c b/arch/powerpc/mm/copro_fault.c index 641e7273d75a..0f9939e693df 100644 --- a/arch/powerpc/platforms/cell/spu_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -1,5 +1,5 @@ /* - * SPU mm fault handler + * CoProcessor (SPU/AFU) mm fault handler * * (C) Copyright IBM Deutschland Entwicklung GmbH 2007 * @@ -23,16 +23,17 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/export.h> - +#include <asm/reg.h> +#include <asm/copro.h> #include <asm/spu.h> -#include <asm/spu_csa.h> +#include <misc/cxl.h> /* * This ought to be kept in sync with the powerpc specific do_page_fault * function. Currently, there are a few corner cases that we haven't had * to handle fortunately. */ -int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, +int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr, unsigned *flt) { struct vm_area_struct *vma; @@ -58,12 +59,12 @@ int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, goto out_unlock; } - is_write = dsisr & MFC_DSISR_ACCESS_PUT; + is_write = dsisr & DSISR_ISSTORE; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto out_unlock; } else { - if (dsisr & MFC_DSISR_ACCESS_DENIED) + if (dsisr & DSISR_PROTFAULT) goto out_unlock; if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto out_unlock; @@ -91,4 +92,58 @@ out_unlock: up_read(&mm->mmap_sem); return ret; } -EXPORT_SYMBOL_GPL(spu_handle_mm_fault); +EXPORT_SYMBOL_GPL(copro_handle_mm_fault); + +int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) +{ + u64 vsid; + int psize, ssize; + + slb->esid = (ea & ESID_MASK) | SLB_ESID_V; + + switch (REGION_ID(ea)) { + case USER_REGION_ID: + pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); + psize = get_slice_psize(mm, ea); + ssize = user_segment_size(ea); + vsid = get_vsid(mm->context.id, ea, ssize); + break; + case VMALLOC_REGION_ID: + pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); + if (ea < VMALLOC_END) + psize = mmu_vmalloc_psize; + else + psize = mmu_io_psize; + ssize = mmu_kernel_ssize; + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + break; + case KERNEL_REGION_ID: + pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); + psize = mmu_linear_psize; + ssize = mmu_kernel_ssize; + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + break; + default: + pr_debug("%s: invalid region access at %016llx\n", __func__, ea); + return 1; + } + + vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER; + + vsid |= mmu_psize_defs[psize].sllp | + ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0); + + slb->vsid = vsid; + + return 0; +} +EXPORT_SYMBOL_GPL(copro_calculate_slb); + +void copro_flush_all_slbs(struct mm_struct *mm) +{ +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif + cxl_slbia(mm); +} +EXPORT_SYMBOL_GPL(copro_flush_all_slbs); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 51ab9e7e6c39..08d659a9fcdb 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -30,9 +30,9 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/perf_event.h> -#include <linux/magic.h> #include <linux/ratelimit.h> #include <linux/context_tracking.h> +#include <linux/hugetlb.h> #include <asm/firmware.h> #include <asm/page.h> @@ -114,22 +114,37 @@ static int store_updates_sp(struct pt_regs *regs) #define MM_FAULT_CONTINUE -1 #define MM_FAULT_ERR(sig) (sig) -static int do_sigbus(struct pt_regs *regs, unsigned long address) +static int do_sigbus(struct pt_regs *regs, unsigned long address, + unsigned int fault) { siginfo_t info; + unsigned int lsb = 0; up_read(¤t->mm->mmap_sem); - if (user_mode(regs)) { - current->thread.trap_nr = BUS_ADRERR; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); - return MM_FAULT_RETURN; + if (!user_mode(regs)) + return MM_FAULT_ERR(SIGBUS); + + current->thread.trap_nr = BUS_ADRERR; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; +#ifdef CONFIG_MEMORY_FAILURE + if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { + pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", + current->comm, current->pid, address); + info.si_code = BUS_MCEERR_AR; } - return MM_FAULT_ERR(SIGBUS); + + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; +#endif + info.si_addr_lsb = lsb; + force_sig_info(SIGBUS, &info, current); + return MM_FAULT_RETURN; } static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) @@ -170,11 +185,8 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) return MM_FAULT_RETURN; } - /* Bus error. x86 handles HWPOISON here, we'll add this if/when - * we support the feature in HW - */ - if (fault & VM_FAULT_SIGBUS) - return do_sigbus(regs, addr); + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) + return do_sigbus(regs, addr, fault); /* We don't understand the fault code, this is fatal */ BUG(); @@ -508,7 +520,6 @@ bail: void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { const struct exception_table_entry *entry; - unsigned long *stackend; /* Are we prepared to handle this fault? */ if ((entry = search_exception_tables(regs->nip)) != NULL) { @@ -537,8 +548,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", regs->nip); - stackend = end_of_stack(current); - if (current != &init_task && *stackend != STACK_END_MAGIC) + if (task_stack_end_corrupted(current)) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); die("Kernel access of bad area", regs, sig); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index afc0a8295f84..ae4962a06476 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -29,6 +29,8 @@ #include <asm/kexec.h> #include <asm/ppc-opcode.h> +#include <misc/cxl.h> + #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) #else @@ -149,9 +151,11 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) static inline void tlbie(unsigned long vpn, int psize, int apsize, int ssize, int local) { - unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); + unsigned int use_local; int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use(); + if (use_local) use_local = mmu_psize_defs[psize].tlbiel; if (lock_tlbie && !use_local) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index daee7f4e5a14..d5339a3b9945 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -51,7 +51,7 @@ #include <asm/cacheflush.h> #include <asm/cputable.h> #include <asm/sections.h> -#include <asm/spu.h> +#include <asm/copro.h> #include <asm/udbg.h> #include <asm/code-patching.h> #include <asm/fadump.h> @@ -92,12 +92,14 @@ extern unsigned long dart_tablebase; static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +EXPORT_SYMBOL_GPL(mmu_psize_defs); struct hash_pte *htab_address; unsigned long htab_size_bytes; unsigned long htab_hash_mask; EXPORT_SYMBOL_GPL(htab_hash_mask); int mmu_linear_psize = MMU_PAGE_4K; +EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -105,6 +107,7 @@ int mmu_vmemmap_psize = MMU_PAGE_4K; #endif int mmu_io_psize = MMU_PAGE_4K; int mmu_kernel_ssize = MMU_SEGSIZE_256M; +EXPORT_SYMBOL_GPL(mmu_kernel_ssize); int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; EXPORT_SYMBOL_GPL(mmu_slb_size); @@ -333,70 +336,69 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, return 0; prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size); - if (prop != NULL) { - pr_info("Page sizes from device-tree:\n"); - size /= 4; - cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); - while(size > 0) { - unsigned int base_shift = be32_to_cpu(prop[0]); - unsigned int slbenc = be32_to_cpu(prop[1]); - unsigned int lpnum = be32_to_cpu(prop[2]); - struct mmu_psize_def *def; - int idx, base_idx; - - size -= 3; prop += 3; - base_idx = get_idx_from_shift(base_shift); - if (base_idx < 0) { - /* - * skip the pte encoding also - */ - prop += lpnum * 2; size -= lpnum * 2; + if (!prop) + return 0; + + pr_info("Page sizes from device-tree:\n"); + size /= 4; + cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); + while(size > 0) { + unsigned int base_shift = be32_to_cpu(prop[0]); + unsigned int slbenc = be32_to_cpu(prop[1]); + unsigned int lpnum = be32_to_cpu(prop[2]); + struct mmu_psize_def *def; + int idx, base_idx; + + size -= 3; prop += 3; + base_idx = get_idx_from_shift(base_shift); + if (base_idx < 0) { + /* skip the pte encoding also */ + prop += lpnum * 2; size -= lpnum * 2; + continue; + } + def = &mmu_psize_defs[base_idx]; + if (base_idx == MMU_PAGE_16M) + cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; + + def->shift = base_shift; + if (base_shift <= 23) + def->avpnm = 0; + else + def->avpnm = (1 << (base_shift - 23)) - 1; + def->sllp = slbenc; + /* + * We don't know for sure what's up with tlbiel, so + * for now we only set it for 4K and 64K pages + */ + if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) + def->tlbiel = 1; + else + def->tlbiel = 0; + + while (size > 0 && lpnum) { + unsigned int shift = be32_to_cpu(prop[0]); + int penc = be32_to_cpu(prop[1]); + + prop += 2; size -= 2; + lpnum--; + + idx = get_idx_from_shift(shift); + if (idx < 0) continue; - } - def = &mmu_psize_defs[base_idx]; - if (base_idx == MMU_PAGE_16M) - cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; - - def->shift = base_shift; - if (base_shift <= 23) - def->avpnm = 0; - else - def->avpnm = (1 << (base_shift - 23)) - 1; - def->sllp = slbenc; - /* - * We don't know for sure what's up with tlbiel, so - * for now we only set it for 4K and 64K pages - */ - if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) - def->tlbiel = 1; - else - def->tlbiel = 0; - - while (size > 0 && lpnum) { - unsigned int shift = be32_to_cpu(prop[0]); - int penc = be32_to_cpu(prop[1]); - - prop += 2; size -= 2; - lpnum--; - - idx = get_idx_from_shift(shift); - if (idx < 0) - continue; - - if (penc == -1) - pr_err("Invalid penc for base_shift=%d " - "shift=%d\n", base_shift, shift); - - def->penc[idx] = penc; - pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," - " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", - base_shift, shift, def->sllp, - def->avpnm, def->tlbiel, def->penc[idx]); - } + + if (penc == -1) + pr_err("Invalid penc for base_shift=%d " + "shift=%d\n", base_shift, shift); + + def->penc[idx] = penc; + pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," + " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", + base_shift, shift, def->sllp, + def->avpnm, def->tlbiel, def->penc[idx]); } - return 1; } - return 0; + + return 1; } #ifdef CONFIG_HUGETLB_PAGE @@ -867,7 +869,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) } #ifdef CONFIG_PPC_MM_SLICES -unsigned int get_paca_psize(unsigned long addr) +static unsigned int get_paca_psize(unsigned long addr) { u64 lpsizes; unsigned char *hpsizes; @@ -901,10 +903,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif - if (get_paca_psize(addr) != MMU_PAGE_4K) { + copro_flush_all_slbs(mm); + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } @@ -989,12 +989,11 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; - struct mm_struct *mm; pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; @@ -1008,7 +1007,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) switch (REGION_ID(ea)) { case USER_REGION_ID: user_region = 1; - mm = current->mm; if (! mm) { DBG_LOW(" user region with no mm !\n"); rc = 1; @@ -1019,7 +1017,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) vsid = get_vsid(mm->context.id, ea, ssize); break; case VMALLOC_REGION_ID: - mm = &init_mm; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); if (ea < VMALLOC_END) psize = mmu_vmalloc_psize; @@ -1104,7 +1101,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) WARN_ON(1); } #endif - check_paca_psize(ea, mm, psize, user_region); + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); goto bail; } @@ -1141,13 +1139,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) "to 4kB pages because of " "non-cacheable mapping\n"); psize = mmu_vmalloc_psize = MMU_PAGE_4K; -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } } - check_paca_psize(ea, mm, psize, user_region); + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); #endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_PPC_HAS_HASH_64K @@ -1182,6 +1179,17 @@ bail: exception_exit(prev_state); return rc; } +EXPORT_SYMBOL_GPL(hash_page_mm); + +int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +{ + struct mm_struct *mm = current->mm; + + if (REGION_ID(ea) == VMALLOC_REGION_ID) + mm = &init_mm; + + return hash_page_mm(mm, ea, access, trap); +} EXPORT_SYMBOL_GPL(hash_page); void hash_preload(struct mm_struct *mm, unsigned long ea, diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index cff59f1bec23..cad68ff8eca5 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -106,11 +106,11 @@ unsigned long __max_low_memory = MAX_LOW_MEM; void MMU_setup(void) { /* Check for nobats option (used in mapin_ram). */ - if (strstr(cmd_line, "nobats")) { + if (strstr(boot_command_line, "nobats")) { __map_without_bats = 1; } - if (strstr(cmd_line, "noltlbs")) { + if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 253b4b971c8a..3481556a1880 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -233,9 +233,6 @@ static void __meminit vmemmap_create_mapping(unsigned long start, } #ifdef CONFIG_MEMORY_HOTPLUG -extern int htab_remove_mapping(unsigned long vstart, unsigned long vend, - int psize, int ssize); - static void vmemmap_remove_mapping(unsigned long start, unsigned long page_size) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e0f7a189c48e..8ebaac75c940 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -260,6 +260,60 @@ static int __init mark_nonram_nosave(void) } return 0; } +#else /* CONFIG_NEED_MULTIPLE_NODES */ +static int __init mark_nonram_nosave(void) +{ + return 0; +} +#endif + +static bool zone_limits_final; + +static unsigned long max_zone_pfns[MAX_NR_ZONES] = { + [0 ... MAX_NR_ZONES - 1] = ~0UL +}; + +/* + * Restrict the specified zone and all more restrictive zones + * to be below the specified pfn. May not be called after + * paging_init(). + */ +void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit) +{ + int i; + + if (WARN_ON(zone_limits_final)) + return; + + for (i = zone; i >= 0; i--) { + if (max_zone_pfns[i] > pfn_limit) + max_zone_pfns[i] = pfn_limit; + } +} + +/* + * Find the least restrictive zone that is entirely below the + * specified pfn limit. Returns < 0 if no suitable zone is found. + * + * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit + * systems -- the DMA limit can be higher than any possible real pfn. + */ +int dma_pfn_limit_to_zone(u64 pfn_limit) +{ + enum zone_type top_zone = ZONE_NORMAL; + int i; + +#ifdef CONFIG_HIGHMEM + top_zone = ZONE_HIGHMEM; +#endif + + for (i = top_zone; i >= 0; i--) { + if (max_zone_pfns[i] <= pfn_limit) + return i; + } + + return -EPERM; +} /* * paging_init() sets up the page tables - in fact we've already done this. @@ -268,7 +322,7 @@ void __init paging_init(void) { unsigned long long total_ram = memblock_phys_mem_size(); phys_addr_t top_of_ram = memblock_end_of_DRAM(); - unsigned long max_zone_pfns[MAX_NR_ZONES]; + enum zone_type top_zone; #ifdef CONFIG_PPC32 unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); @@ -290,18 +344,20 @@ void __init paging_init(void) (unsigned long long)top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + #ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT; - max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT; + top_zone = ZONE_HIGHMEM; + limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT); #else - max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; + top_zone = ZONE_NORMAL; #endif + + limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT); + zone_limits_final = true; free_area_init_nodes(max_zone_pfns); mark_nonram_nosave(); } -#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ static void __init register_page_bootmem_info(void) { diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index d7737a542fd7..649666d5d1c2 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -538,7 +538,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, */ static int numa_setup_cpu(unsigned long lcpu) { - int nid; + int nid = -1; struct device_node *cpu; /* @@ -555,19 +555,21 @@ static int numa_setup_cpu(unsigned long lcpu) if (!cpu) { WARN_ON(1); - nid = 0; - goto out; + if (cpu_present(lcpu)) + goto out_present; + else + goto out; } nid = of_node_to_nid_single(cpu); +out_present: if (nid < 0 || !node_online(nid)) nid = first_online_node; -out: - map_cpu_to_node(lcpu, nid); + map_cpu_to_node(lcpu, nid); of_node_put(cpu); - +out: return nid; } @@ -1127,20 +1129,11 @@ void __init do_init_bootmem(void) * even before we online them, so that we can use cpu_to_{node,mem} * early in boot, cf. smp_prepare_cpus(). */ - for_each_possible_cpu(cpu) { - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)cpu); + for_each_present_cpu(cpu) { + numa_setup_cpu((unsigned long)cpu); } } -void __init paging_init(void) -{ - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT; - free_area_init_nodes(max_zone_pfns); -} - static int __init early_numa(char *p) { if (!p) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index c695943a513c..c90e602677c9 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -48,7 +48,7 @@ static inline int pte_looks_normal(pte_t pte) (_PAGE_PRESENT | _PAGE_USER); } -struct page * maybe_pte_to_page(pte_t pte) +static struct page *maybe_pte_to_page(pte_t pte) { unsigned long pfn = pte_pfn(pte); struct page *page; diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 0399a6702958..6e450ca66526 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -46,9 +46,6 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize, return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot; } -#define slb_vsid_shift(ssize) \ - ((ssize) == MMU_SEGSIZE_256M? SLB_VSID_SHIFT: SLB_VSID_SHIFT_1T) - static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, unsigned long flags) { diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index b0c75cc15efc..8d7bda94d196 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -30,9 +30,11 @@ #include <linux/err.h> #include <linux/spinlock.h> #include <linux/export.h> +#include <linux/hugetlb.h> #include <asm/mman.h> #include <asm/mmu.h> -#include <asm/spu.h> +#include <asm/copro.h> +#include <asm/hugetlb.h> /* some sanity checks */ #if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE @@ -232,9 +234,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz spin_unlock_irqrestore(&slice_convert_lock, flags); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } /* @@ -671,9 +671,7 @@ void slice_set_psize(struct mm_struct *mm, unsigned long address, spin_unlock_irqrestore(&slice_convert_lock, flags); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } void slice_set_range_psize(struct mm_struct *mm, unsigned long start, diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index f75301f2c85f..6adf55fa5d88 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -12,6 +12,7 @@ #include <asm/processor.h> #include <asm/uaccess.h> #include <asm/compat.h> +#include <asm/oprofile_impl.h> #define STACK_SP(STACK) *(STACK) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b7cd00b0171e..a6995d4e93d4 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -59,9 +59,9 @@ struct cpu_hw_events { struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES]; }; -DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); -struct power_pmu *ppmu; +static struct power_pmu *ppmu; /* * Normally, to ignore kernel events we set the FCS (freeze counters @@ -124,7 +124,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} -void power_pmu_flush_branch_stack(void) {} +static void power_pmu_flush_branch_stack(void) {} static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } #endif /* CONFIG_PPC32 */ @@ -375,7 +375,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event) /* Called from ctxsw to prevent one process's branch entries to * mingle with the other process's entries during context switch. */ -void power_pmu_flush_branch_stack(void) +static void power_pmu_flush_branch_stack(void) { if (ppmu->bhrb_nr) power_pmu_bhrb_reset(); @@ -408,7 +408,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Processing BHRB entries */ -void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) { u64 val; u64 addr; @@ -1573,7 +1573,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1587,7 +1587,7 @@ void power_pmu_start_txn(struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(struct pmu *pmu) +static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1600,7 +1600,7 @@ void power_pmu_cancel_txn(struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(struct pmu *pmu) +static int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -1888,7 +1888,7 @@ ssize_t power_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%02llx\n", pmu_attr->id); } -struct pmu power_pmu = { +static struct pmu power_pmu = { .pmu_enable = power_pmu_enable, .pmu_disable = power_pmu_disable, .event_init = power_pmu_event_init, diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 70d4f748b54b..6c8710dd90c9 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -75,86 +75,6 @@ static struct attribute_group format_group = { static struct kmem_cache *hv_page_cache; -/* - * read_offset_data - copy data from one buffer to another while treating the - * source buffer as a small view on the total avaliable - * source data. - * - * @dest: buffer to copy into - * @dest_len: length of @dest in bytes - * @requested_offset: the offset within the source data we want. Must be > 0 - * @src: buffer to copy data from - * @src_len: length of @src in bytes - * @source_offset: the offset in the sorce data that (src,src_len) refers to. - * Must be > 0 - * - * returns the number of bytes copied. - * - * The following ascii art shows the various buffer possitioning we need to - * handle, assigns some arbitrary varibles to points on the buffer, and then - * shows how we fiddle with those values to get things we care about (copy - * start in src and copy len) - * - * s = @src buffer - * d = @dest buffer - * '.' areas in d are written to. - * - * u - * x w v z - * d |.........| - * s |----------------------| - * - * u - * x w z v - * d |........------| - * s |------------------| - * - * x w u,z,v - * d |........| - * s |------------------| - * - * x,w u,v,z - * d |..................| - * s |------------------| - * - * x u - * w v z - * d |........| - * s |------------------| - * - * x z w v - * d |------| - * s |------| - * - * x = source_offset - * w = requested_offset - * z = source_offset + src_len - * v = requested_offset + dest_len - * - * w_offset_in_s = w - x = requested_offset - source_offset - * z_offset_in_s = z - x = src_len - * v_offset_in_s = v - x = request_offset + dest_len - src_len - */ -static ssize_t read_offset_data(void *dest, size_t dest_len, - loff_t requested_offset, void *src, - size_t src_len, loff_t source_offset) -{ - size_t w_offset_in_s = requested_offset - source_offset; - size_t z_offset_in_s = src_len; - size_t v_offset_in_s = requested_offset + dest_len - src_len; - size_t u_offset_in_s = min(z_offset_in_s, v_offset_in_s); - size_t copy_len = u_offset_in_s - w_offset_in_s; - - if (requested_offset < 0 || source_offset < 0) - return -EINVAL; - - if (z_offset_in_s <= w_offset_in_s) - return 0; - - memcpy(dest, src + w_offset_in_s, copy_len); - return copy_len; -} - static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, unsigned long version, unsigned long index) @@ -183,8 +103,10 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, { unsigned long hret; ssize_t ret = 0; - size_t catalog_len = 0, catalog_page_len = 0, page_count = 0; + size_t catalog_len = 0, catalog_page_len = 0; loff_t page_offset = 0; + loff_t offset_in_page; + size_t copy_len; uint64_t catalog_version_num = 0; void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); struct hv_24x7_catalog_page_0 *page_0 = page; @@ -202,7 +124,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, catalog_len = catalog_page_len * 4096; page_offset = offset / 4096; - page_count = count / 4096; + offset_in_page = offset % 4096; if (page_offset >= catalog_page_len) goto e_free; @@ -216,8 +138,13 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, } } - ret = read_offset_data(buf, count, offset, - page, 4096, page_offset * 4096); + copy_len = 4096 - offset_in_page; + if (copy_len > count) + copy_len = count; + + memcpy(buf, page+offset_in_page, copy_len); + ret = copy_len; + e_free: if (hret) pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:" @@ -225,9 +152,9 @@ e_free: catalog_version_num, page_offset, hret); kmem_cache_free(hv_page_cache, page); - pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n", - offset, page_offset, count, page_count, catalog_len, - catalog_page_len, ret); + pr_devel("catalog_read: offset=%lld(%lld) count=%zu " + "catalog_len=%zu(%zu) => %zd\n", offset, page_offset, + count, catalog_len, catalog_page_len, ret); return ret; } @@ -294,7 +221,7 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, u16 lpar, u64 *res, bool success_expected) { - unsigned long ret; + unsigned long ret = -ENOMEM; /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -304,7 +231,27 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, struct reqb { struct hv_24x7_request_buffer buf; struct hv_24x7_request req; - } __packed __aligned(4096) request_buffer = { + } __packed *request_buffer; + + struct { + struct hv_24x7_data_result_buffer buf; + struct hv_24x7_result res; + struct hv_24x7_result_element elem; + __be64 result; + } __packed *result_buffer; + + BUILD_BUG_ON(sizeof(*request_buffer) > 4096); + BUILD_BUG_ON(sizeof(*result_buffer) > 4096); + + request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); + if (!request_buffer) + goto out; + + result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); + if (!result_buffer) + goto out_free_request_buffer; + + *request_buffer = (struct reqb) { .buf = { .interface_version = HV_24X7_IF_VERSION_CURRENT, .num_requests = 1, @@ -320,28 +267,27 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, } }; - struct resb { - struct hv_24x7_data_result_buffer buf; - struct hv_24x7_result res; - struct hv_24x7_result_element elem; - __be64 result; - } __packed __aligned(4096) result_buffer = {}; - ret = plpar_hcall_norets(H_GET_24X7_DATA, - virt_to_phys(&request_buffer), sizeof(request_buffer), - virt_to_phys(&result_buffer), sizeof(result_buffer)); + virt_to_phys(request_buffer), sizeof(*request_buffer), + virt_to_phys(result_buffer), sizeof(*result_buffer)); if (ret) { if (success_expected) - pr_err_ratelimited("hcall failed: %d %#x %#x %d => 0x%lx (%ld) detail=0x%x failing ix=%x\n", - domain, offset, ix, lpar, - ret, ret, - result_buffer.buf.detailed_rc, - result_buffer.buf.failing_request_ix); - return ret; + pr_err_ratelimited("hcall failed: %d %#x %#x %d => " + "0x%lx (%ld) detail=0x%x failing ix=%x\n", + domain, offset, ix, lpar, ret, ret, + result_buffer->buf.detailed_rc, + result_buffer->buf.failing_request_ix); + goto out_free_result_buffer; } - *res = be64_to_cpu(result_buffer.result); + *res = be64_to_cpu(result_buffer->result); + +out_free_result_buffer: + kfree(result_buffer); +out_free_request_buffer: + kfree(request_buffer); +out: return ret; } diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c index b0389bbe4f94..ddc12a1926ef 100644 --- a/arch/powerpc/platforms/40x/ep405.c +++ b/arch/powerpc/platforms/40x/ep405.c @@ -49,7 +49,7 @@ static void __iomem *bcsr_regs; /* there's more, can't be bothered typing them tho */ -static __initdata struct of_device_id ep405_of_bus[] = { +static const struct of_device_id ep405_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c index 8f3920e5a046..b0c46375dd95 100644 --- a/arch/powerpc/platforms/40x/ppc40x_simple.c +++ b/arch/powerpc/platforms/40x/ppc40x_simple.c @@ -24,7 +24,7 @@ #include <linux/init.h> #include <linux/of_platform.h> -static __initdata struct of_device_id ppc40x_of_bus[] = { +static const struct of_device_id ppc40x_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c index d0fc6866b00c..9aa7ae2f4164 100644 --- a/arch/powerpc/platforms/40x/virtex.c +++ b/arch/powerpc/platforms/40x/virtex.c @@ -17,7 +17,7 @@ #include <asm/xilinx_pci.h> #include <asm/ppc4xx.h> -static struct of_device_id xilinx_of_bus_ids[] __initdata = { +static const struct of_device_id xilinx_of_bus_ids[] __initconst = { { .compatible = "xlnx,plb-v46-1.00.a", }, { .compatible = "xlnx,plb-v34-1.01.a", }, { .compatible = "xlnx,plb-v34-1.02.a", }, diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c index 8b691df72f74..f7ac2d0fcb44 100644 --- a/arch/powerpc/platforms/40x/walnut.c +++ b/arch/powerpc/platforms/40x/walnut.c @@ -28,7 +28,7 @@ #include <asm/pci-bridge.h> #include <asm/ppc4xx.h> -static __initdata struct of_device_id walnut_of_bus[] = { +static const struct of_device_id walnut_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 4d88f6a19058..82f2da28cd27 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -215,9 +215,9 @@ config AKEBONO select NET_VENDOR_IBM select IBM_EMAC_EMAC4 select IBM_EMAC_RGMII_WOL - select USB - select USB_OHCI_HCD_PLATFORM - select USB_EHCI_HCD_PLATFORM + select USB if USB_SUPPORT + select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD + select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD select MMC_SDHCI select MMC_SDHCI_PLTFM select MMC_SDHCI_OF_476GTR diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c index e300dd4c89bf..22ca5430c9cb 100644 --- a/arch/powerpc/platforms/44x/canyonlands.c +++ b/arch/powerpc/platforms/44x/canyonlands.c @@ -33,7 +33,7 @@ #define BCSR_USB_EN 0x11 -static __initdata struct of_device_id ppc460ex_of_bus[] = { +static const struct of_device_id ppc460ex_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/ebony.c b/arch/powerpc/platforms/44x/ebony.c index 6a4232bbdf88..ae893226392d 100644 --- a/arch/powerpc/platforms/44x/ebony.c +++ b/arch/powerpc/platforms/44x/ebony.c @@ -28,7 +28,7 @@ #include <asm/pci-bridge.h> #include <asm/ppc4xx.h> -static __initdata struct of_device_id ebony_of_bus[] = { +static const struct of_device_id ebony_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index 4241bc825800..c7c6758b3cfe 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -32,7 +32,7 @@ #include <asm/mpic.h> #include <asm/mmu.h> -static __initdata struct of_device_id iss4xx_of_bus[] = { +static const struct of_device_id iss4xx_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,plb6", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c index 3ffb915446e3..573c3d2689c6 100644 --- a/arch/powerpc/platforms/44x/ppc44x_simple.c +++ b/arch/powerpc/platforms/44x/ppc44x_simple.c @@ -24,7 +24,7 @@ #include <linux/init.h> #include <linux/of_platform.h> -static __initdata struct of_device_id ppc44x_of_bus[] = { +static const struct of_device_id ppc44x_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 33986c1a05da..58db9d083969 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -38,7 +38,7 @@ #include <linux/pci.h> #include <linux/i2c.h> -static struct of_device_id ppc47x_of_bus[] __initdata = { +static const struct of_device_id ppc47x_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,plb6", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c index 9e09b835758b..3ee4a03c1496 100644 --- a/arch/powerpc/platforms/44x/sam440ep.c +++ b/arch/powerpc/platforms/44x/sam440ep.c @@ -29,7 +29,7 @@ #include <asm/ppc4xx.h> #include <linux/i2c.h> -static __initdata struct of_device_id sam440ep_of_bus[] = { +static const struct of_device_id sam440ep_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c index cf96ccaa760c..ad272c17c640 100644 --- a/arch/powerpc/platforms/44x/virtex.c +++ b/arch/powerpc/platforms/44x/virtex.c @@ -21,7 +21,7 @@ #include <asm/ppc4xx.h> #include "44x.h" -static struct of_device_id xilinx_of_bus_ids[] __initdata = { +static const struct of_device_id xilinx_of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "xlnx,plb-v46-1.00.a", }, { .compatible = "xlnx,plb-v46-1.02.a", }, diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 3a104284b338..501333cf42cf 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -28,7 +28,7 @@ #include <asm/dma.h> -static __initdata struct of_device_id warp_of_bus[] = { +static const struct of_device_id warp_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index adb95f03d4d4..e996e007bc44 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -337,7 +337,7 @@ void __init mpc512x_init_IRQ(void) /* * Nodes to do bus probe on, soc and localbus */ -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "fsl,mpc5121-immr", }, { .compatible = "fsl,mpc5121-localbus", }, { .compatible = "fsl,mpc5121-mbx", }, diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c index 1843bc932011..7492de3cf6d0 100644 --- a/arch/powerpc/platforms/52xx/lite5200.c +++ b/arch/powerpc/platforms/52xx/lite5200.c @@ -34,13 +34,13 @@ */ /* mpc5200 device tree match tables */ -static struct of_device_id mpc5200_cdm_ids[] __initdata = { +static const struct of_device_id mpc5200_cdm_ids[] __initconst = { { .compatible = "fsl,mpc5200-cdm", }, { .compatible = "mpc5200-cdm", }, {} }; -static struct of_device_id mpc5200_gpio_ids[] __initdata = { +static const struct of_device_id mpc5200_gpio_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, { .compatible = "mpc5200-gpio", }, {} diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 070d315dd6cd..32cae33c4266 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -30,7 +30,7 @@ #include <asm/machdep.h> #include <asm/mpc52xx.h> -static struct of_device_id mpc5200_gpio_ids[] __initdata = { +static const struct of_device_id mpc5200_gpio_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, { .compatible = "mpc5200-gpio", }, {} diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index d7e94f49532a..26993826a797 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -23,12 +23,12 @@ #include <asm/mpc52xx.h> /* MPC5200 device tree match tables */ -static struct of_device_id mpc52xx_xlb_ids[] __initdata = { +static const struct of_device_id mpc52xx_xlb_ids[] __initconst = { { .compatible = "fsl,mpc5200-xlb", }, { .compatible = "mpc5200-xlb", }, {} }; -static struct of_device_id mpc52xx_bus_ids[] __initdata = { +static const struct of_device_id mpc52xx_bus_ids[] __initconst = { { .compatible = "fsl,mpc5200-immr", }, { .compatible = "fsl,mpc5200b-immr", }, { .compatible = "simple-bus", }, @@ -108,21 +108,21 @@ void __init mpc52xx_declare_of_platform_devices(void) /* * match tables used by mpc52xx_map_common_devices() */ -static struct of_device_id mpc52xx_gpt_ids[] __initdata = { +static const struct of_device_id mpc52xx_gpt_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpt", }, { .compatible = "mpc5200-gpt", }, /* old */ {} }; -static struct of_device_id mpc52xx_cdm_ids[] __initdata = { +static const struct of_device_id mpc52xx_cdm_ids[] __initconst = { { .compatible = "fsl,mpc5200-cdm", }, { .compatible = "mpc5200-cdm", }, /* old */ {} }; -static const struct of_device_id mpc52xx_gpio_simple[] = { +static const struct of_device_id mpc52xx_gpio_simple[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, {} }; -static const struct of_device_id mpc52xx_gpio_wkup[] = { +static const struct of_device_id mpc52xx_gpio_wkup[] __initconst = { { .compatible = "fsl,mpc5200-gpio-wkup", }, {} }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 37f7a89c10f2..f8f0081759fb 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -564,7 +564,7 @@ static int mpc52xx_lpbfifo_remove(struct platform_device *op) return 0; } -static struct of_device_id mpc52xx_lpbfifo_match[] = { +static const struct of_device_id mpc52xx_lpbfifo_match[] = { { .compatible = "fsl,mpc5200-lpbfifo", }, {}, }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 2898b737deb7..2944bc84b9d6 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -119,12 +119,12 @@ /* MPC5200 device tree match tables */ -static struct of_device_id mpc52xx_pic_ids[] __initdata = { +static const struct of_device_id mpc52xx_pic_ids[] __initconst = { { .compatible = "fsl,mpc5200-pic", }, { .compatible = "mpc5200-pic", }, {} }; -static struct of_device_id mpc52xx_sdma_ids[] __initdata = { +static const struct of_device_id mpc52xx_sdma_ids[] __initconst = { { .compatible = "fsl,mpc5200-bestcomm", }, { .compatible = "mpc5200-bestcomm", }, {} diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 79799b29ffe2..3d0c3a01143d 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -298,7 +298,7 @@ static void __init ep8248e_setup_arch(void) ppc_md.progress("ep8248e_setup_arch(), finish", 0); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "fsl,ep8248e-bcsr", }, {}, diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index 058cc1895c88..387b446f4161 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -180,7 +180,7 @@ static void __init km82xx_setup_arch(void) ppc_md.progress("km82xx_setup_arch(), finish", 0); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c index 6a14cf50f4a2..d24deacf07d0 100644 --- a/arch/powerpc/platforms/82xx/mpc8272_ads.c +++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c @@ -181,7 +181,7 @@ static void __init mpc8272_ads_setup_arch(void) ppc_md.progress("mpc8272_ads_setup_arch(), finish", 0); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c index e5f82ec8df17..3a5164ad10ad 100644 --- a/arch/powerpc/platforms/82xx/pq2fads.c +++ b/arch/powerpc/platforms/82xx/pq2fads.c @@ -168,7 +168,7 @@ static int __init pq2fads_probe(void) return of_flat_dt_is_compatible(root, "fsl,pq2fads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 73997027b085..463fa91ee5b6 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -214,7 +214,7 @@ static const struct i2c_device_id mcu_ids[] = { }; MODULE_DEVICE_TABLE(i2c, mcu_ids); -static struct of_device_id mcu_of_match_table[] = { +static const struct of_device_id mcu_of_match_table[] = { { .compatible = "fsl,mcu-mpc8349emitx", }, { }, }; diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index 125336f750c6..ef9d01a049c1 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -114,7 +114,7 @@ void __init mpc83xx_ipic_and_qe_init_IRQ(void) } #endif /* CONFIG_QUICC_ENGINE */ -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .type = "soc", }, { .compatible = "soc", }, { .compatible = "simple-bus" }, diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c index a494fa57bdf9..80aea8c4b5a3 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_itx.c +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -38,7 +38,7 @@ #include "mpc83xx.h" -static struct of_device_id __initdata mpc834x_itx_ids[] = { +static const struct of_device_id mpc834x_itx_ids[] __initconst = { { .compatible = "fsl,pq2pro-localbus", }, {}, }; diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 4b4c081df94d..eeb80e25214d 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -321,7 +321,7 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { .end = mpc83xx_suspend_end, }; -static struct of_device_id pmc_match[]; +static const struct of_device_id pmc_match[]; static int pmc_probe(struct platform_device *ofdev) { const struct of_device_id *match; @@ -420,7 +420,7 @@ static struct pmc_type pmc_types[] = { } }; -static struct of_device_id pmc_match[] = { +static const struct of_device_id pmc_match[] = { { .compatible = "fsl,mpc8313-pmc", .data = &pmc_types[0], diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 0c1e6903597e..f22635a71d01 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -276,7 +276,7 @@ config CORENET_GENERIC For 64bit kernel, the following boards are supported: T208x QDS/RDB, T4240 QDS/RDB and B4 QDS The following boards are supported for both 32bit and 64bit kernel: - P5020 DS, P5040 DS and T104xQDS + P5020 DS, P5040 DS and T104xQDS/RDB endif # FSL_SOC_BOOKE diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index b564b5e23f7c..4a9ad871a168 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -14,7 +14,7 @@ #include "mpc85xx.h" -static struct of_device_id __initdata mpc85xx_common_ids[] = { +static const struct of_device_id mpc85xx_common_ids[] __initconst = { { .type = "soc", }, { .compatible = "soc", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index d22dd85e50bf..e56b89a792ed 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -20,6 +20,7 @@ #include <asm/time.h> #include <asm/machdep.h> #include <asm/pci-bridge.h> +#include <asm/pgtable.h> #include <asm/ppc-pci.h> #include <mm/mmu_decl.h> #include <asm/prom.h> @@ -67,6 +68,16 @@ void __init corenet_gen_setup_arch(void) swiotlb_detect_4g(); +#if defined(CONFIG_FSL_PCI) && defined(CONFIG_ZONE_DMA32) + /* + * Inbound windows don't cover the full lower 4 GiB + * due to conflicts with PCICSRBAR and outbound windows, + * so limit the DMA32 zone to 2 GiB, to allow consistent + * allocations to succeed. + */ + limit_zone_pfn(ZONE_DMA32, 1UL << (31 - PAGE_SHIFT)); +#endif + pr_info("%s board\n", ppc_md.name); mpc85xx_qe_init(); @@ -129,6 +140,9 @@ static const char * const boards[] __initconst = { "fsl,B4220QDS", "fsl,T1040QDS", "fsl,T1042QDS", + "fsl,T1040RDB", + "fsl,T1042RDB", + "fsl,T1042RDB_PI", "keymile,kmcoge4", NULL }; diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c index 3daff7c63569..12019f17f297 100644 --- a/arch/powerpc/platforms/85xx/ppa8548.c +++ b/arch/powerpc/platforms/85xx/ppa8548.c @@ -59,7 +59,7 @@ static void ppa8548_show_cpuinfo(struct seq_file *m) seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .type = "soc", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index 7f2673293549..8ad2fe6f200a 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -18,6 +18,7 @@ #include <linux/kernel.h> #include <linux/of_fdt.h> #include <asm/machdep.h> +#include <asm/pgtable.h> #include <asm/time.h> #include <asm/udbg.h> #include <asm/mpic.h> @@ -44,6 +45,15 @@ static void __init qemu_e500_setup_arch(void) fsl_pci_assign_primary(); swiotlb_detect_4g(); +#if defined(CONFIG_FSL_PCI) && defined(CONFIG_ZONE_DMA32) + /* + * Inbound windows don't cover the full lower 4 GiB + * due to conflicts with PCICSRBAR and outbound windows, + * so limit the DMA32 zone to 2 GiB, to allow consistent + * allocations to succeed. + */ + limit_zone_pfn(ZONE_DMA32, 1UL << (31 - PAGE_SHIFT)); +#endif mpc85xx_smp_init(); } diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index bb75add67084..8162b0412117 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -24,7 +24,7 @@ static struct device_node *halt_node; -static struct of_device_id child_match[] = { +static const struct of_device_id child_match[] = { { .compatible = "sgy,gpio-halt", }, @@ -147,7 +147,7 @@ static int gpio_halt_remove(struct platform_device *pdev) return 0; } -static struct of_device_id gpio_halt_match[] = { +static const struct of_device_id gpio_halt_match[] = { /* We match on the gpio bus itself and scan the children since they * wont be matched against us. We know the bus wont match until it * has been registered too. */ diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c index c23f3443880a..bf17933b20f3 100644 --- a/arch/powerpc/platforms/86xx/gef_ppc9a.c +++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c @@ -213,7 +213,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c index 8a6ac20686ea..8facf5873866 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc310.c +++ b/arch/powerpc/platforms/86xx/gef_sbc310.c @@ -200,7 +200,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c index 06c72636f299..8c9058df5642 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc610.c +++ b/arch/powerpc/platforms/86xx/gef_sbc610.c @@ -190,7 +190,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index d479d68fbb2b..55413a547ea8 100644 --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -85,7 +85,7 @@ static void __init mpc8610_suspend_init(void) static inline void mpc8610_suspend_init(void) { } #endif /* CONFIG_SUSPEND */ -static struct of_device_id __initdata mpc8610_ids[] = { +static const struct of_device_id mpc8610_ids[] __initconst = { { .compatible = "fsl,mpc8610-immr", }, { .compatible = "fsl,mpc8610-guts", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index e8bf3fae5606..07ccb1b0cc7d 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -127,7 +127,7 @@ mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "fsl,srio", }, { .compatible = "gianfar", }, diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c index b47a8fd0f3d3..6810b71d54a7 100644 --- a/arch/powerpc/platforms/86xx/sbc8641d.c +++ b/arch/powerpc/platforms/86xx/sbc8641d.c @@ -92,7 +92,7 @@ mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c index 82363e98f50e..61cae4c1edb8 100644 --- a/arch/powerpc/platforms/8xx/adder875.c +++ b/arch/powerpc/platforms/8xx/adder875.c @@ -92,7 +92,7 @@ static int __init adder875_probe(void) return of_flat_dt_is_compatible(root, "analogue-and-micro,adder875"); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c index e62166681d08..2bedeb7d5f8f 100644 --- a/arch/powerpc/platforms/8xx/ep88xc.c +++ b/arch/powerpc/platforms/8xx/ep88xc.c @@ -147,7 +147,7 @@ static int __init ep88xc_probe(void) return of_flat_dt_is_compatible(root, "fsl,ep88xc"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c index 63084640c5c5..78180c5e73ff 100644 --- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c @@ -122,7 +122,7 @@ static int __init mpc86xads_probe(void) return of_flat_dt_is_compatible(root, "fsl,mpc866ads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index 5921dcb498fd..4d62bf9dc789 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -197,7 +197,7 @@ static int __init mpc885ads_probe(void) return of_flat_dt_is_compatible(root, "fsl,mpc885ads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index dda607807def..bee47a2b23e6 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -124,7 +124,7 @@ static int __init tqm8xx_probe(void) return of_flat_dt_is_compatible(node, "tqc,tqm8xx"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7d9ee3d8c618..76483e3acd60 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -116,6 +116,12 @@ config POWER6_CPU config POWER7_CPU bool "POWER7" depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER + +config POWER8_CPU + bool "POWER8" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER config E5500_CPU bool "Freescale e5500" diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 9978f594cac0..870b6dbd4d18 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -86,6 +86,7 @@ config SPU_FS_64K_LS config SPU_BASE bool default n + select PPC_COPRO_BASE config CBE_RAS bool "RAS features for bare metal Cell BE" diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index fe053e7c73ee..2d16884f67b9 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -20,7 +20,7 @@ spu-manage-$(CONFIG_PPC_CELL_COMMON) += spu_manage.o obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ spu_notify.o \ - spu_syscalls.o spu_fault.o \ + spu_syscalls.o \ $(spu-priv1-y) \ $(spu-manage-y) \ spufs/ diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 173568140a32..2b98a36ef8fb 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -454,7 +454,7 @@ static struct celleb_phb_spec celleb_fake_pci_spec __initdata = { .setup = celleb_setup_fake_pci, }; -static struct of_device_id celleb_phb_match[] __initdata = { +static const struct of_device_id celleb_phb_match[] __initconst = { { .name = "pci-pseudo", .data = &celleb_fake_pci_spec, diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index 1d5a4d8ddad9..34e8ce2976aa 100644 --- a/arch/powerpc/platforms/cell/celleb_setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -102,7 +102,7 @@ static void __init celleb_setup_arch_common(void) #endif } -static struct of_device_id celleb_bus_ids[] __initdata = { +static const struct of_device_id celleb_bus_ids[] __initconst = { { .type = "scc", }, { .type = "ioif", }, /* old style */ {}, diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 2930d1e81a05..ffcbd242e669 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -76,10 +76,6 @@ static LIST_HEAD(spu_full_list); static DEFINE_SPINLOCK(spu_full_list_lock); static DEFINE_MUTEX(spu_full_list_mutex); -struct spu_slb { - u64 esid, vsid; -}; - void spu_invalidate_slbs(struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; @@ -149,7 +145,7 @@ static void spu_restart_dma(struct spu *spu) } } -static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb) +static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb) { struct spu_priv2 __iomem *priv2 = spu->priv2; @@ -167,45 +163,12 @@ static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb) static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) { - struct mm_struct *mm = spu->mm; - struct spu_slb slb; - int psize; - - pr_debug("%s\n", __func__); - - slb.esid = (ea & ESID_MASK) | SLB_ESID_V; + struct copro_slb slb; + int ret; - switch(REGION_ID(ea)) { - case USER_REGION_ID: -#ifdef CONFIG_PPC_MM_SLICES - psize = get_slice_psize(mm, ea); -#else - psize = mm->context.user_psize; -#endif - slb.vsid = (get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT) | SLB_VSID_USER; - break; - case VMALLOC_REGION_ID: - if (ea < VMALLOC_END) - psize = mmu_vmalloc_psize; - else - psize = mmu_io_psize; - slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; - break; - case KERNEL_REGION_ID: - psize = mmu_linear_psize; - slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; - break; - default: - /* Future: support kernel segments so that drivers - * can use SPUs. - */ - pr_debug("invalid region access at %016lx\n", ea); - return 1; - } - slb.vsid |= mmu_psize_defs[psize].sllp; + ret = copro_calculate_slb(spu->mm, ea, &slb); + if (ret) + return ret; spu_load_slb(spu, spu->slb_replace, &slb); @@ -253,7 +216,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) return 0; } -static void __spu_kernel_slb(void *addr, struct spu_slb *slb) +static void __spu_kernel_slb(void *addr, struct copro_slb *slb) { unsigned long ea = (unsigned long)addr; u64 llp; @@ -272,7 +235,7 @@ static void __spu_kernel_slb(void *addr, struct spu_slb *slb) * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the * address @new_addr is present. */ -static inline int __slb_present(struct spu_slb *slbs, int nr_slbs, +static inline int __slb_present(struct copro_slb *slbs, int nr_slbs, void *new_addr) { unsigned long ea = (unsigned long)new_addr; @@ -297,7 +260,7 @@ static inline int __slb_present(struct spu_slb *slbs, int nr_slbs, void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa, void *code, int code_size) { - struct spu_slb slbs[4]; + struct copro_slb slbs[4]; int i, nr_slbs = 0; /* start and end addresses of both mappings */ void *addrs[] = { diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index 8cb6260cc80f..e45894a08118 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -138,7 +138,7 @@ int spufs_handle_class1(struct spu_context *ctx) if (ctx->state == SPU_STATE_RUNNABLE) ctx->spu->stats.hash_flt++; - /* we must not hold the lock when entering spu_handle_mm_fault */ + /* we must not hold the lock when entering copro_handle_mm_fault */ spu_release(ctx); access = (_PAGE_PRESENT | _PAGE_USER); @@ -149,7 +149,7 @@ int spufs_handle_class1(struct spu_context *ctx) /* hashing failed, so try the actual fault handler */ if (ret) - ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt); + ret = copro_handle_mm_fault(current->mm, ea, dsisr, &flt); /* * This is nasty: we need the state_mutex for all the bookkeeping even diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 7044fd36197b..5b77b1919fd2 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -258,7 +258,7 @@ static void chrp_init_early(void) struct device_node *node; const char *property; - if (strstr(cmd_line, "console=")) + if (strstr(boot_command_line, "console=")) return; /* find the boot console from /chosen/stdout */ if (!of_chosen) diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c index a138e14bad2e..bd4ba5d7d568 100644 --- a/arch/powerpc/platforms/embedded6xx/gamecube.c +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -90,7 +90,7 @@ define_machine(gamecube) { }; -static struct of_device_id gamecube_of_bus[] = { +static const struct of_device_id gamecube_of_bus[] = { { .compatible = "nintendo,flipper", }, { }, }; diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 455e7c087422..168e1d80b2e5 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -21,7 +21,7 @@ #include "mpc10x.h" -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .type = "soc", }, { .compatible = "simple-bus", }, {}, diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 25e3bfb64efb..1613303177e6 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -149,7 +149,7 @@ static int __init mvme5100_add_bridge(struct device_node *dev) return 0; } -static struct of_device_id mvme5100_of_bus_ids[] __initdata = { +static const struct of_device_id mvme5100_of_bus_ids[] __initconst = { { .compatible = "hawk-bridge", }, {}, }; diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index c458b60d14c4..d572833ebd00 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -24,7 +24,7 @@ #include "mpc10x.h" -static __initdata struct of_device_id storcenter_of_bus[] = { +static const struct of_device_id storcenter_of_bus[] __initconst = { { .name = "soc", }, {}, }; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 6d8dadf19f0b..388e29bab8f6 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -235,7 +235,7 @@ define_machine(wii) { .machine_shutdown = wii_shutdown, }; -static struct of_device_id wii_of_bus[] = { +static const struct of_device_id wii_of_bus[] = { { .compatible = "nintendo,hollywood", }, { }, }; diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index 15adee544638..ada33358950d 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -290,7 +290,7 @@ static int gpio_mdio_remove(struct platform_device *dev) return 0; } -static struct of_device_id gpio_mdio_match[] = +static const struct of_device_id gpio_mdio_match[] = { { .compatible = "gpio-mdio", diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 8c54de6d8ec4..d71b2c7e8403 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -393,7 +393,7 @@ static inline void pasemi_pcmcia_init(void) #endif -static struct of_device_id pasemi_bus_ids[] = { +static const struct of_device_id pasemi_bus_ids[] = { /* Unfortunately needed for legacy firmwares */ { .type = "localbus", }, { .type = "sdc", }, diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 141f8899a633..b127a29ac526 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -336,7 +336,7 @@ static void __init pmac_setup_arch(void) #endif #ifdef CONFIG_ADB - if (strstr(cmd_line, "adb_sync")) { + if (strstr(boot_command_line, "adb_sync")) { extern int __adb_probe_sync; __adb_probe_sync = 1; } @@ -460,7 +460,7 @@ pmac_halt(void) static void __init pmac_init_early(void) { /* Enable early btext debug if requested */ - if (strstr(cmd_line, "btextdbg")) { + if (strstr(boot_command_line, "btextdbg")) { udbg_adb_init_early(); register_early_udbg_console(); } @@ -469,8 +469,8 @@ static void __init pmac_init_early(void) pmac_feature_init(); /* Initialize debug stuff */ - udbg_scc_init(!!strstr(cmd_line, "sccdbg")); - udbg_adb_init(!!strstr(cmd_line, "btextdbg")); + udbg_scc_init(!!strstr(boot_command_line, "sccdbg")); + udbg_adb_init(!!strstr(boot_command_line, "btextdbg")); #ifdef CONFIG_PPC64 iommu_init_early_dart(); diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index c945bed4dc9e..426814a2ede3 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -66,6 +66,54 @@ static struct notifier_block ioda_eeh_nb = { }; #ifdef CONFIG_DEBUG_FS +static ssize_t ioda_eeh_ei_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_controller *hose = filp->private_data; + struct pnv_phb *phb = hose->private_data; + struct eeh_dev *edev; + struct eeh_pe *pe; + int pe_no, type, func; + unsigned long addr, mask; + char buf[50]; + int ret; + + if (!phb->eeh_ops || !phb->eeh_ops->err_inject) + return -ENXIO; + + ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (!ret) + return -EFAULT; + + /* Retrieve parameters */ + ret = sscanf(buf, "%x:%x:%x:%lx:%lx", + &pe_no, &type, &func, &addr, &mask); + if (ret != 5) + return -EINVAL; + + /* Retrieve PE */ + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return -ENOMEM; + edev->phb = hose; + edev->pe_config_addr = pe_no; + pe = eeh_pe_get(edev); + kfree(edev); + if (!pe) + return -ENODEV; + + /* Do error injection */ + ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask); + return ret < 0 ? ret : count; +} + +static const struct file_operations ioda_eeh_ei_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = ioda_eeh_ei_write, +}; + static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val) { struct pci_controller *hose = data; @@ -152,6 +200,10 @@ static int ioda_eeh_post_init(struct pci_controller *hose) if (!phb->has_dbgfs && phb->dbgfs) { phb->has_dbgfs = 1; + debugfs_create_file("err_injct", 0200, + phb->dbgfs, hose, + &ioda_eeh_ei_fops); + debugfs_create_file("err_injct_outbound", 0600, phb->dbgfs, hose, &ioda_eeh_outb_dbgfs_ops); @@ -189,6 +241,7 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) { struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; + bool freeze_pe = false; int enable, ret = 0; s64 rc; @@ -212,6 +265,10 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) case EEH_OPT_THAW_DMA: enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA; break; + case EEH_OPT_FREEZE_PE: + freeze_pe = true; + enable = OPAL_EEH_ACTION_SET_FREEZE_ALL; + break; default: pr_warn("%s: Invalid option %d\n", __func__, option); @@ -219,17 +276,35 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) } /* If PHB supports compound PE, to handle it */ - if (phb->unfreeze_pe) { - ret = phb->unfreeze_pe(phb, pe->addr, enable); + if (freeze_pe) { + if (phb->freeze_pe) { + phb->freeze_pe(phb, pe->addr); + } else { + rc = opal_pci_eeh_freeze_set(phb->opal_id, + pe->addr, + enable); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing " + "PHB#%x-PE#%x\n", + __func__, rc, + phb->hose->global_number, pe->addr); + ret = -EIO; + } + } } else { - rc = opal_pci_eeh_freeze_clear(phb->opal_id, - pe->addr, - enable); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n", - __func__, rc, option, phb->hose->global_number, - pe->addr); - ret = -EIO; + if (phb->unfreeze_pe) { + ret = phb->unfreeze_pe(phb, pe->addr, enable); + } else { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + pe->addr, + enable); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld enable %d " + "for PHB#%x-PE#%x\n", + __func__, rc, option, + phb->hose->global_number, pe->addr); + ret = -EIO; + } } } @@ -439,11 +514,11 @@ int ioda_eeh_phb_reset(struct pci_controller *hose, int option) if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT) rc = opal_pci_reset(phb->opal_id, - OPAL_PHB_COMPLETE, + OPAL_RESET_PHB_COMPLETE, OPAL_ASSERT_RESET); else if (option == EEH_RESET_DEACTIVATE) rc = opal_pci_reset(phb->opal_id, - OPAL_PHB_COMPLETE, + OPAL_RESET_PHB_COMPLETE, OPAL_DEASSERT_RESET); if (rc < 0) goto out; @@ -483,15 +558,15 @@ static int ioda_eeh_root_reset(struct pci_controller *hose, int option) */ if (option == EEH_RESET_FUNDAMENTAL) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_FUNDAMENTAL_RESET, + OPAL_RESET_PCI_FUNDAMENTAL, OPAL_ASSERT_RESET); else if (option == EEH_RESET_HOT) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_HOT_RESET, + OPAL_RESET_PCI_HOT, OPAL_ASSERT_RESET); else if (option == EEH_RESET_DEACTIVATE) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_HOT_RESET, + OPAL_RESET_PCI_HOT, OPAL_DEASSERT_RESET); if (rc < 0) goto out; @@ -607,6 +682,31 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) if (pe->type & EEH_PE_PHB) { ret = ioda_eeh_phb_reset(hose, option); } else { + struct pnv_phb *phb; + s64 rc; + + /* + * The frozen PE might be caused by PAPR error injection + * registers, which are expected to be cleared after hitting + * frozen PE as stated in the hardware spec. Unfortunately, + * that's not true on P7IOC. So we have to clear it manually + * to avoid recursive EEH errors during recovery. + */ + phb = hose->private_data; + if (phb->model == PNV_PHB_MODEL_P7IOC && + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PHB_ERROR, + OPAL_ASSERT_RESET); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clearing " + "error injection registers\n", + __func__, rc); + return -EIO; + } + } + bus = eeh_pe_bus_get(pe); if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) @@ -628,8 +728,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) * Retrieve error log, which contains log from device driver * and firmware. */ -int ioda_eeh_get_log(struct eeh_pe *pe, int severity, - char *drv_log, unsigned long len) +static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) { pnv_pci_dump_phb_diag_data(pe->phb, pe->data); @@ -650,6 +750,49 @@ static int ioda_eeh_configure_bridge(struct eeh_pe *pe) return 0; } +static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + s64 ret; + + /* Sanity check on error type */ + if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && + type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) { + pr_warn("%s: Invalid error type %d\n", + __func__, type); + return -ERANGE; + } + + if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR || + func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) { + pr_warn("%s: Invalid error function %d\n", + __func__, func); + return -ERANGE; + } + + /* Firmware supports error injection ? */ + if (!opal_check_token(OPAL_PCI_ERR_INJECT)) { + pr_warn("%s: Firmware doesn't support error injection\n", + __func__); + return -ENXIO; + } + + /* Do error injection */ + ret = opal_pci_err_inject(phb->opal_id, pe->addr, + type, func, addr, mask); + if (ret != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld injecting error " + "%d-%d to PHB#%x-PE#%x\n", + __func__, ret, type, func, + hose->global_number, pe->addr); + return -EIO; + } + + return 0; +} + static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) { /* GEM */ @@ -743,14 +886,12 @@ static int ioda_eeh_get_pe(struct pci_controller *hose, * the master PE because slave PE is invisible * to EEH core. */ - if (phb->get_pe_state) { - pnv_pe = &phb->ioda.pe_array[pe_no]; - if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { - pnv_pe = pnv_pe->master; - WARN_ON(!pnv_pe || - !(pnv_pe->flags & PNV_IODA_PE_MASTER)); - pe_no = pnv_pe->pe_number; - } + pnv_pe = &phb->ioda.pe_array[pe_no]; + if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { + pnv_pe = pnv_pe->master; + WARN_ON(!pnv_pe || + !(pnv_pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pnv_pe->pe_number; } /* Find the PE according to PE# */ @@ -761,15 +902,37 @@ static int ioda_eeh_get_pe(struct pci_controller *hose, if (!dev_pe) return -EEXIST; - /* - * At this point, we're sure the compound PE should - * be put into frozen state. - */ + /* Freeze the (compound) PE */ *pe = dev_pe; - if (phb->freeze_pe && - !(dev_pe->state & EEH_PE_ISOLATED)) + if (!(dev_pe->state & EEH_PE_ISOLATED)) phb->freeze_pe(phb, pe_no); + /* + * At this point, we're sure the (compound) PE should + * have been frozen. However, we still need poke until + * hitting the frozen PE on top level. + */ + dev_pe = dev_pe->parent; + while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { + int ret; + int active_flags = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE); + + ret = eeh_ops->get_state(dev_pe, NULL); + if (ret <= 0 || (ret & active_flags) == active_flags) { + dev_pe = dev_pe->parent; + continue; + } + + /* Frozen parent PE */ + *pe = dev_pe; + if (!(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, dev_pe->addr); + + /* Next one */ + dev_pe = dev_pe->parent; + } + return 0; } @@ -971,5 +1134,6 @@ struct pnv_eeh_ops ioda_eeh_ops = { .reset = ioda_eeh_reset, .get_log = ioda_eeh_get_log, .configure_bridge = ioda_eeh_configure_bridge, + .err_inject = ioda_eeh_err_inject, .next_error = ioda_eeh_next_error }; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index fd7a16f855ed..3e89cbf55885 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -359,6 +359,31 @@ static int powernv_eeh_configure_bridge(struct eeh_pe *pe) } /** + * powernv_pe_err_inject - Inject specified error to the indicated PE + * @pe: the indicated PE + * @type: error type + * @func: specific error type + * @addr: address + * @mask: address mask + * + * The routine is called to inject specified error, which is + * determined by @type and @func, to the indicated PE for + * testing purpose. + */ +static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + if (phb->eeh_ops && phb->eeh_ops->err_inject) + ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask); + + return ret; +} + +/** * powernv_eeh_next_error - Retrieve next EEH error to handle * @pe: Affected PE * @@ -414,6 +439,7 @@ static struct eeh_ops powernv_eeh_ops = { .wait_state = powernv_eeh_wait_state, .get_log = powernv_eeh_get_log, .configure_bridge = powernv_eeh_configure_bridge, + .err_inject = powernv_eeh_err_inject, .read_config = pnv_pci_cfg_read, .write_config = pnv_pci_cfg_write, .next_error = powernv_eeh_next_error, diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 85bb8fff7947..23260f7dfa7a 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -112,7 +112,7 @@ static ssize_t init_dump_show(struct dump_obj *dump_obj, struct dump_attribute *attr, char *buf) { - return sprintf(buf, "1 - initiate dump\n"); + return sprintf(buf, "1 - initiate Service Processor(FSP) dump\n"); } static int64_t dump_fips_init(uint8_t type) @@ -121,7 +121,7 @@ static int64_t dump_fips_init(uint8_t type) rc = opal_dump_init(type); if (rc) - pr_warn("%s: Failed to initiate FipS dump (%d)\n", + pr_warn("%s: Failed to initiate FSP dump (%d)\n", __func__, rc); return rc; } @@ -131,8 +131,12 @@ static ssize_t init_dump_store(struct dump_obj *dump_obj, const char *buf, size_t count) { - dump_fips_init(DUMP_TYPE_FSP); - pr_info("%s: Initiated FSP dump\n", __func__); + int rc; + + rc = dump_fips_init(DUMP_TYPE_FSP); + if (rc == OPAL_SUCCESS) + pr_info("%s: Initiated FSP dump\n", __func__); + return count; } @@ -297,7 +301,7 @@ static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, * and rely on userspace to ask us to try * again. */ - pr_info("%s: Platform dump partially read.ID = 0x%x\n", + pr_info("%s: Platform dump partially read. ID = 0x%x\n", __func__, dump->id); return -EIO; } @@ -423,6 +427,10 @@ void __init opal_platform_dump_init(void) { int rc; + /* ELOG not supported by firmware */ + if (!opal_check_token(OPAL_DUMP_READ)) + return; + dump_kset = kset_create_and_add("dump", NULL, opal_kobj); if (!dump_kset) { pr_warn("%s: Failed to create dump kset\n", __func__); diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index bbdb3ffaab98..518fe95dbf24 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -295,6 +295,10 @@ int __init opal_elog_init(void) { int rc = 0; + /* ELOG not supported by firmware */ + if (!opal_check_token(OPAL_ELOG_READ)) + return -1; + elog_kset = kset_create_and_add("elog", NULL, opal_kobj); if (!elog_kset) { pr_warn("%s: failed to create elog kset\n", __func__); diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index ad4b31df779a..dd2c285ad170 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -191,6 +191,7 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, { struct lpc_debugfs_entry *lpc = filp->private_data; u32 data, pos, len, todo; + __be32 bedata; int rc; if (!access_ok(VERIFY_WRITE, ubuf, count)) @@ -213,9 +214,10 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, len = 2; } rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos, - &data, len); + &bedata, len); if (rc) return -ENXIO; + data = be32_to_cpu(bedata); switch(len) { case 4: rc = __put_user((u32)data, (u32 __user *)ubuf); diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index acd9f7e96678..f9896fd5d04a 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -78,7 +78,7 @@ void __init opal_nvram_init(void) } nvram_size = be32_to_cpup(nbytes_p); - printk(KERN_INFO "OPAL nvram setup, %u bytes\n", nvram_size); + pr_info("OPAL nvram setup, %u bytes\n", nvram_size); of_node_put(np); ppc_md.nvram_read = opal_nvram_read; diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index b1885db8fdf3..499707ddaa9c 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -42,6 +42,9 @@ unsigned long __init opal_get_boot_time(void) __be64 __h_m_s_ms; long rc = OPAL_BUSY; + if (!opal_check_token(OPAL_RTC_READ)) + goto out; + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); if (rc == OPAL_BUSY_EVENT) @@ -49,16 +52,18 @@ unsigned long __init opal_get_boot_time(void) else mdelay(10); } - if (rc != OPAL_SUCCESS) { - ppc_md.get_rtc_time = NULL; - ppc_md.set_rtc_time = NULL; - return 0; - } + if (rc != OPAL_SUCCESS) + goto out; + y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, &tm); return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); +out: + ppc_md.get_rtc_time = NULL; + ppc_md.set_rtc_time = NULL; + return 0; } void opal_get_rtc_time(struct rtc_time *tm) diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index d8a000a9988b..ae14c40b4b1c 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -2,7 +2,7 @@ #include <linux/jump_label.h> #include <asm/trace.h> -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; void opal_tracepoint_regfunc(void) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 2e6ce1b8dc8f..e9e2450c1fdd 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -184,6 +184,7 @@ OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER) OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR); OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET); +OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT); OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC); OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE); OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW); @@ -232,6 +233,7 @@ OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); +OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2); @@ -247,3 +249,4 @@ OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); +OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 4b005ae5dc4b..b642b0562f5a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -105,12 +105,12 @@ int __init early_init_dt_scan_opal(unsigned long node, if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; powerpc_firmware_features |= FW_FEATURE_OPALv3; - printk("OPAL V3 detected !\n"); + pr_info("OPAL V3 detected !\n"); } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; - printk("OPAL V2 detected !\n"); + pr_info("OPAL V2 detected !\n"); } else { - printk("OPAL V1 detected !\n"); + pr_info("OPAL V1 detected !\n"); } /* Reinit all cores with the right endian */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index df241b11d4f7..468a0f23c7f2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -37,41 +37,43 @@ #include <asm/xics.h> #include <asm/debug.h> #include <asm/firmware.h> +#include <asm/pnv-pci.h> + +#include <misc/cxl.h> #include "powernv.h" #include "pci.h" -#define define_pe_printk_level(func, kern_level) \ -static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ -{ \ - struct va_format vaf; \ - va_list args; \ - char pfix[32]; \ - int r; \ - \ - va_start(args, fmt); \ - \ - vaf.fmt = fmt; \ - vaf.va = &args; \ - \ - if (pe->pdev) \ - strlcpy(pfix, dev_name(&pe->pdev->dev), \ - sizeof(pfix)); \ - else \ - sprintf(pfix, "%04x:%02x ", \ - pci_domain_nr(pe->pbus), \ - pe->pbus->number); \ - r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ - pfix, pe->pe_number, &vaf); \ - \ - va_end(args); \ - \ - return r; \ -} \ - -define_pe_printk_level(pe_err, KERN_ERR); -define_pe_printk_level(pe_warn, KERN_WARNING); -define_pe_printk_level(pe_info, KERN_INFO); +static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + char pfix[32]; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (pe->pdev) + strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); + else + sprintf(pfix, "%04x:%02x ", + pci_domain_nr(pe->pbus), pe->pbus->number); + + printk("%spci %s: [PE# %.3d] %pV", + level, pfix, pe->pe_number, &vaf); + + va_end(args); +} + +#define pe_err(pe, fmt, ...) \ + pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__) +#define pe_warn(pe, fmt, ...) \ + pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) +#define pe_info(pe, fmt, ...) \ + pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) /* * stdcix is only supposed to be used in hypervisor real mode as per @@ -385,7 +387,7 @@ static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) } } -int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) +static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) { struct pnv_ioda_pe *pe, *slave; s64 rc; @@ -890,6 +892,28 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, return 0; } +static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, + struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *pe; + u64 end, mask; + + if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) + return 0; + + pe = &phb->ioda.pe_array[pdn->pe_number]; + if (!pe->tce_bypass_enabled) + return __dma_get_required_mask(&pdev->dev); + + + end = pe->tce_bypass_base + memblock_end_of_DRAM(); + mask = 1ULL << (fls64(end) - 1); + mask += mask - 1; + + return mask; +} + static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus, bool add_to_iommu_group) @@ -1306,14 +1330,186 @@ static void pnv_ioda2_msi_eoi(struct irq_data *d) icp_native_eoi(d); } + +static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) +{ + struct irq_data *idata; + struct irq_chip *ichip; + + if (phb->type != PNV_PHB_IODA2) + return; + + if (!phb->ioda.irq_chip_init) { + /* + * First time we setup an MSI IRQ, we need to setup the + * corresponding IRQ chip to route correctly. + */ + idata = irq_get_irq_data(virq); + ichip = irq_data_get_irq_chip(idata); + phb->ioda.irq_chip_init = 1; + phb->ioda.irq_chip = *ichip; + phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; + } + irq_set_chip(virq, &phb->ioda.irq_chip); +} + +#ifdef CONFIG_CXL_BASE + +struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + return hose->dn; +} +EXPORT_SYMBOL(pnv_pci_to_phb_node); + +int pnv_phb_to_cxl(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + int rc; + + pe = pnv_ioda_get_pe(dev); + if (!pe) + return -ENODEV; + + pe_info(pe, "Switching PHB to CXL\n"); + + rc = opal_pci_set_phb_cxl_mode(phb->opal_id, 1, pe->pe_number); + if (rc) + dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); + + return rc; +} +EXPORT_SYMBOL(pnv_phb_to_cxl); + +/* Find PHB for cxl dev and allocate MSI hwirqs? + * Returns the absolute hardware IRQ number + */ +int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); + + if (hwirq < 0) { + dev_warn(&dev->dev, "Failed to find a free MSI\n"); + return -ENOSPC; + } + + return phb->msi_base + hwirq; +} +EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); + +void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); +} +EXPORT_SYMBOL(pnv_cxl_release_hwirqs); + +void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int i, hwirq; + + for (i = 1; i < CXL_IRQ_RANGES; i++) { + if (!irqs->range[i]) + continue; + pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", + i, irqs->offset[i], + irqs->range[i]); + hwirq = irqs->offset[i] - phb->msi_base; + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, + irqs->range[i]); + } +} +EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); + +int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int i, hwirq, try; + + memset(irqs, 0, sizeof(struct cxl_irq_ranges)); + + /* 0 is reserved for the multiplexed PSL DSI interrupt */ + for (i = 1; i < CXL_IRQ_RANGES && num; i++) { + try = num; + while (try) { + hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); + if (hwirq >= 0) + break; + try /= 2; + } + if (!try) + goto fail; + + irqs->offset[i] = phb->msi_base + hwirq; + irqs->range[i] = try; + pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", + i, irqs->offset[i], irqs->range[i]); + num -= try; + } + if (num) + goto fail; + + return 0; +fail: + pnv_cxl_release_hwirq_ranges(irqs, dev); + return -ENOSPC; +} +EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); + +int pnv_cxl_get_irq_count(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + return phb->msi_bmp.irq_count; +} +EXPORT_SYMBOL(pnv_cxl_get_irq_count); + +int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, + unsigned int virq) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + unsigned int xive_num = hwirq - phb->msi_base; + struct pnv_ioda_pe *pe; + int rc; + + if (!(pe = pnv_ioda_get_pe(dev))) + return -ENODEV; + + /* Assign XIVE to PE */ + rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); + if (rc) { + pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " + "hwirq 0x%x XIVE 0x%x PE\n", + pci_name(dev), rc, phb->msi_base, hwirq, xive_num); + return -EIO; + } + set_msi_irq_chip(phb, virq); + + return 0; +} +EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); +#endif + static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg) { struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); struct pci_dn *pdn = pci_get_pdn(dev); - struct irq_data *idata; - struct irq_chip *ichip; unsigned int xive_num = hwirq - phb->msi_base; __be32 data; int rc; @@ -1365,22 +1561,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, } msg->data = be32_to_cpu(data); - /* - * Change the IRQ chip for the MSI interrupts on PHB3. - * The corresponding IRQ chip should be populated for - * the first time. - */ - if (phb->type == PNV_PHB_IODA2) { - if (!phb->ioda.irq_chip_init) { - idata = irq_get_irq_data(virq); - ichip = irq_data_get_irq_chip(idata); - phb->ioda.irq_chip_init = 1; - phb->ioda.irq_chip = *ichip; - phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; - } - - irq_set_chip(virq, &phb->ioda.irq_chip); - } + set_msi_irq_chip(phb, virq); pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," " address=%x_%08x data=%x PE# %d\n", @@ -1627,12 +1808,12 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) { - opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, + opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); } -void __init pnv_pci_init_ioda_phb(struct device_node *np, - u64 hub_id, int ioda_type) +static void __init pnv_pci_init_ioda_phb(struct device_node *np, + u64 hub_id, int ioda_type) { struct pci_controller *hose; struct pnv_phb *phb; @@ -1782,6 +1963,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; + phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; /* Setup shutdown function for kexec */ phb->shutdown = pnv_pci_ioda_shutdown; @@ -1803,7 +1985,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, pci_add_flags(PCI_REASSIGN_ALL_RSRC); /* Reset IODA tables to a clean state */ - rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); + rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); if (rc) pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b45c49249a5d..b3ca77ddf36d 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -753,6 +753,17 @@ int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) return __dma_set_mask(&pdev->dev, dma_mask); } +u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + + if (phb && phb->dma_get_required_mask) + return phb->dma_get_required_mask(phb, pdev); + + return __dma_get_required_mask(&pdev->dev); +} + void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 48494d4b6058..34d29eb2a4de 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -85,6 +85,8 @@ struct pnv_eeh_ops { int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); + int (*err_inject)(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask); int (*next_error)(struct eeh_pe **pe); }; #endif /* CONFIG_EEH */ @@ -122,6 +124,8 @@ struct pnv_phb { void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev, u64 dma_mask); + u64 (*dma_get_required_mask)(struct pnv_phb *phb, + struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); void (*shutdown)(struct pnv_phb *phb); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 75501bfede7f..6c8e2d188cd0 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -13,6 +13,7 @@ struct pci_dev; extern void pnv_pci_init(void); extern void pnv_pci_shutdown(void); extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask); +extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev); #else static inline void pnv_pci_init(void) { } static inline void pnv_pci_shutdown(void) { } @@ -21,6 +22,11 @@ static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) { return -ENODEV; } + +static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) +{ + return 0; +} #endif extern void pnv_lpc_init(void); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 5a0e2dc6de5f..3f9546d8a51f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -173,6 +173,14 @@ static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) return __dma_set_mask(dev, dma_mask); } +static u64 pnv_dma_get_required_mask(struct device *dev) +{ + if (dev_is_pci(dev)) + return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); + + return __dma_get_required_mask(dev); +} + static void pnv_shutdown(void) { /* Let the PCI code clear up IODA tables */ @@ -307,7 +315,7 @@ static int __init pnv_probe(void) * Returns the cpu frequency for 'cpu' in Hz. This is used by * /proc/cpuinfo */ -unsigned long pnv_get_proc_freq(unsigned int cpu) +static unsigned long pnv_get_proc_freq(unsigned int cpu) { unsigned long ret_freq; @@ -335,6 +343,7 @@ define_machine(powernv) { .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, .dma_set_mask = pnv_dma_set_mask, + .dma_get_required_mask = pnv_dma_get_required_mask, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, #endif diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 5fcfcf44e3a9..4753958cd509 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -54,7 +54,7 @@ static void pnv_smp_setup_cpu(int cpu) #endif } -int pnv_smp_kick_cpu(int nr) +static int pnv_smp_kick_cpu(int nr) { unsigned int pcpu = get_hard_smp_processor_id(nr); unsigned long start_here = @@ -168,9 +168,9 @@ static void pnv_smp_cpu_kill_self(void) power7_nap(1); ppc64_runlatch_on(); - /* Reenable IRQs briefly to clear the IPI that woke us */ - local_irq_enable(); - local_irq_disable(); + /* Clear the IPI that woke us up */ + icp_native_flush_interrupt(); + local_paca->irq_happened &= PACA_IRQ_HARD_DIS; mb(); if (cpu_core_split_required()) diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 894ecb3eb596..c87f96b79d1a 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -24,6 +24,7 @@ #include <asm/smp.h> #include "subcore.h" +#include "powernv.h" /* diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 2d8bf15879fd..fc44ad0475f8 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -555,7 +555,6 @@ static int cmm_mem_going_offline(void *arg) pa_last = pa_last->next; free_page((unsigned long)cmm_page_list); cmm_page_list = pa_last; - continue; } } pa_curr = pa_curr->next; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index a2450b8a50a5..fdf01b660d59 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -17,6 +17,7 @@ #include <linux/slab.h> #include <linux/of.h> #include "offline_states.h" +#include "pseries.h" #include <asm/prom.h> #include <asm/machdep.h> @@ -363,7 +364,8 @@ static int dlpar_online_cpu(struct device_node *dn) int rc = 0; unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -373,8 +375,9 @@ static int dlpar_online_cpu(struct device_node *dn) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; BUG_ON(get_cpu_current_state(cpu) != CPU_STATE_OFFLINE); @@ -388,7 +391,7 @@ static int dlpar_online_cpu(struct device_node *dn) } if (cpu == num_possible_cpus()) printk(KERN_WARNING "Could not find cpu to online " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); @@ -442,7 +445,8 @@ static int dlpar_offline_cpu(struct device_node *dn) int rc = 0; unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -452,8 +456,9 @@ static int dlpar_offline_cpu(struct device_node *dn) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) @@ -475,14 +480,14 @@ static int dlpar_offline_cpu(struct device_node *dn) * Upgrade it's state to CPU_STATE_OFFLINE. */ set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - BUG_ON(plpar_hcall_norets(H_PROD, intserv[i]) + BUG_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS); __cpu_die(cpu); break; } if (cpu == num_possible_cpus()) printk(KERN_WARNING "Could not find cpu to offline " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); @@ -494,15 +499,15 @@ out: static ssize_t dlpar_cpu_release(const char *buf, size_t count) { struct device_node *dn; - const u32 *drc_index; + u32 drc_index; int rc; dn = of_find_node_by_path(buf); if (!dn) return -EINVAL; - drc_index = of_get_property(dn, "ibm,my-drc-index", NULL); - if (!drc_index) { + rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); + if (rc) { of_node_put(dn); return -EINVAL; } @@ -513,7 +518,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) return -EINVAL; } - rc = dlpar_release_drc(*drc_index); + rc = dlpar_release_drc(drc_index); if (rc) { of_node_put(dn); return rc; @@ -521,7 +526,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) rc = dlpar_detach_node(dn); if (rc) { - dlpar_acquire_drc(*drc_index); + dlpar_acquire_drc(drc_index); return rc; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index b08053819d99..a6c7e19f5eb3 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -88,29 +88,14 @@ static int pseries_eeh_init(void) * and its variant since the old firmware probably support address * of domain/bus/slot/function for EEH RTAS operations. */ - if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: RTAS service <ibm,set-eeh-option> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: RTAS service <ibm,set-slot-reset> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && - ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: RTAS service <ibm,read-slot-reset-state2> and " - "<ibm,read-slot-reset-state> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: RTAS service <ibm,slot-error-detail> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && - ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: RTAS service <ibm,configure-pe> and " - "<ibm,configure-bridge> invalid\n", - __func__); + if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE || + ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || + (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && + ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) || + ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE || + (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && + ibm_configure_bridge == RTAS_UNKNOWN_SERVICE)) { + pr_info("EEH functionality not supported\n"); return -EINVAL; } @@ -118,11 +103,11 @@ static int pseries_eeh_init(void) spin_lock_init(&slot_errbuf_lock); eeh_error_buf_size = rtas_token("rtas-error-log-max"); if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: unknown EEH error log size\n", + pr_info("%s: unknown EEH error log size\n", __func__); eeh_error_buf_size = 1024; } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - pr_warn("%s: EEH error log size %d exceeds the maximal %d\n", + pr_info("%s: EEH error log size %d exceeds the maximal %d\n", __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); eeh_error_buf_size = RTAS_ERROR_LOG_MAX; } @@ -349,7 +334,9 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) if (pe->addr) config_addr = pe->addr; break; - + case EEH_OPT_FREEZE_PE: + /* Not support */ + return 0; default: pr_err("%s: Invalid option %d\n", __func__, option); @@ -729,6 +716,7 @@ static struct eeh_ops pseries_eeh_ops = { .wait_state = pseries_eeh_wait_state, .get_log = pseries_eeh_get_log, .configure_bridge = pseries_eeh_configure_bridge, + .err_inject = NULL, .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 20d62975856f..b174fa751d26 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -90,7 +90,7 @@ static void rtas_stop_self(void) { static struct rtas_args args = { .nargs = 0, - .nret = 1, + .nret = cpu_to_be32(1), .rets = &args.args[0], }; @@ -312,7 +312,8 @@ static void pseries_remove_processor(struct device_node *np) { unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -322,8 +323,9 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; BUG_ON(cpu_online(cpu)); set_cpu_present(cpu, false); @@ -332,7 +334,7 @@ static void pseries_remove_processor(struct device_node *np) } if (cpu >= nr_cpu_ids) printk(KERN_WARNING "Could not find cpu to remove " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 34064f50945e..3c4c0dcd90d3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -20,6 +20,7 @@ #include <asm/machdep.h> #include <asm/prom.h> #include <asm/sparsemem.h> +#include "pseries.h" unsigned long pseries_memory_block_size(void) { diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 4642d6a4d356..de1ec54a2a57 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -329,16 +329,16 @@ struct direct_window { /* Dynamic DMA Window support */ struct ddw_query_response { - __be32 windows_available; - __be32 largest_available_block; - __be32 page_size; - __be32 migration_capable; + u32 windows_available; + u32 largest_available_block; + u32 page_size; + u32 migration_capable; }; struct ddw_create_response { - __be32 liobn; - __be32 addr_hi; - __be32 addr_lo; + u32 liobn; + u32 addr_hi; + u32 addr_lo; }; static LIST_HEAD(direct_window_list); @@ -725,16 +725,18 @@ static void remove_ddw(struct device_node *np, bool remove_prop) { struct dynamic_dma_window_prop *dwp; struct property *win64; - const u32 *ddw_avail; + u32 ddw_avail[3]; u64 liobn; - int len, ret = 0; + int ret = 0; + + ret = of_property_read_u32_array(np, "ibm,ddw-applicable", + &ddw_avail[0], 3); - ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); if (!win64) return; - if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp)) + if (ret || win64->length < sizeof(*dwp)) goto delprop; dwp = win64->value; @@ -872,8 +874,9 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ - ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr, - BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); + ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, + cfg_addr, BUID_HI(buid), BUID_LO(buid), + page_shift, window_shift); } while (rtas_busy_delay(ret)); dev_info(&dev->dev, "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " @@ -910,7 +913,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) int page_shift; u64 dma_addr, max_addr; struct device_node *dn; - const u32 *uninitialized_var(ddw_avail); + u32 ddw_avail[3]; struct direct_window *window; struct property *win64; struct dynamic_dma_window_prop *ddwprop; @@ -942,8 +945,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for the given node in that order. * the property is actually in the parent, not the PE */ - ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); - if (!ddw_avail || len < 3 * sizeof(u32)) + ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", + &ddw_avail[0], 3); + if (ret) goto out_failed; /* @@ -966,11 +970,11 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) dev_dbg(&dev->dev, "no free dynamic windows"); goto out_failed; } - if (be32_to_cpu(query.page_size) & 4) { + if (query.page_size & 4) { page_shift = 24; /* 16MB */ - } else if (be32_to_cpu(query.page_size) & 2) { + } else if (query.page_size & 2) { page_shift = 16; /* 64kB */ - } else if (be32_to_cpu(query.page_size) & 1) { + } else if (query.page_size & 1) { page_shift = 12; /* 4kB */ } else { dev_dbg(&dev->dev, "no supported direct page size in mask %x", @@ -980,7 +984,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) /* verify the window * number of ptes will map the partition */ /* check largest block * page size > max memory hotplug addr */ max_addr = memory_hotplug_max(); - if (be32_to_cpu(query.largest_available_block) < (max_addr >> page_shift)) { + if (query.largest_available_block < (max_addr >> page_shift)) { dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u " "%llu-sized pages\n", max_addr, query.largest_available_block, 1ULL << page_shift); @@ -1006,8 +1010,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (ret != 0) goto out_free_prop; - ddwprop->liobn = create.liobn; - ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2)); + ddwprop->liobn = cpu_to_be32(create.liobn); + ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) | + create.addr_lo); ddwprop->tce_shift = cpu_to_be32(page_shift); ddwprop->window_shift = cpu_to_be32(len); @@ -1039,7 +1044,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); - dma_addr = of_read_number(&create.addr_hi, 2); + dma_addr = be64_to_cpu(ddwprop->dma_base); goto out_unlock; out_free_window: diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 34e64237fff9..8c509d5397c6 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -59,8 +59,6 @@ EXPORT_SYMBOL(plpar_hcall); EXPORT_SYMBOL(plpar_hcall9); EXPORT_SYMBOL(plpar_hcall_norets); -extern void pSeries_find_serial_port(void); - void vpa_init(int cpu) { int hwcpu = get_hard_smp_processor_id(cpu); @@ -642,7 +640,7 @@ EXPORT_SYMBOL(arch_free_page); #endif #ifdef CONFIG_TRACEPOINTS -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; void hcall_tracepoint_regfunc(void) diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 0cc240b7f694..11a3b617ef5d 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -276,8 +276,10 @@ static ssize_t pSeries_nvram_get_size(void) * sequence #: The unique sequence # for each event. (until it wraps) * error log: The error log from event_scan */ -int nvram_write_os_partition(struct nvram_os_partition *part, char * buff, - int length, unsigned int err_type, unsigned int error_log_cnt) +static int nvram_write_os_partition(struct nvram_os_partition *part, + char *buff, int length, + unsigned int err_type, + unsigned int error_log_cnt) { int rc; loff_t tmp_index; @@ -330,9 +332,9 @@ int nvram_write_error_log(char * buff, int length, * * Reads nvram partition for at most 'length' */ -int nvram_read_partition(struct nvram_os_partition *part, char *buff, - int length, unsigned int *err_type, - unsigned int *error_log_cnt) +static int nvram_read_partition(struct nvram_os_partition *part, char *buff, + int length, unsigned int *err_type, + unsigned int *error_log_cnt) { int rc; loff_t tmp_index; diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index c413ec158ff5..67e48594040c 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -29,6 +29,7 @@ #include <asm/pci-bridge.h> #include <asm/prom.h> #include <asm/ppc-pci.h> +#include "pseries.h" #if 0 void pcibios_name_device(struct pci_dev *dev) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index dff05b9eb946..5a4d0fc03b03 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -126,7 +126,7 @@ struct epow_errorlog { #define EPOW_MAIN_ENCLOSURE 5 #define EPOW_POWER_OFF 7 -void rtas_parse_epow_errlog(struct rtas_error_log *log) +static void rtas_parse_epow_errlog(struct rtas_error_log *log) { struct pseries_errorlog *pseries_log; struct epow_errorlog *epow_log; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e724d3186e73..125c589eeef5 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -561,7 +561,7 @@ void pSeries_coalesce_init(void) * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string) */ -void pSeries_cmo_feature_init(void) +static void pSeries_cmo_feature_init(void) { char *ptr, *key, *value, *end; int call_status; diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 47b6b9f81d43..ad56edc39919 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -314,7 +314,7 @@ axon_ram_remove(struct platform_device *device) return 0; } -static struct of_device_id axon_ram_device_id[] = { +static const struct of_device_id axon_ram_device_id[] = { { .type = "dma-memory" }, diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c index e9056e438575..2d8a101b6b9e 100644 --- a/arch/powerpc/sysdev/dcr.c +++ b/arch/powerpc/sysdev/dcr.c @@ -230,5 +230,6 @@ EXPORT_SYMBOL_GPL(dcr_unmap_mmio); #ifdef CONFIG_PPC_DCR_NATIVE DEFINE_SPINLOCK(dcr_ind_lock); +EXPORT_SYMBOL_GPL(dcr_ind_lock); #endif /* defined(CONFIG_PPC_DCR_NATIVE) */ diff --git a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c index afc2dbf37011..90545ad1626e 100644 --- a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c +++ b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c @@ -171,7 +171,7 @@ static int mpc85xx_l2ctlr_of_remove(struct platform_device *dev) return 0; } -static struct of_device_id mpc85xx_l2ctlr_of_match[] = { +static const struct of_device_id mpc85xx_l2ctlr_of_match[] = { { .compatible = "fsl,p2020-l2-cache-controller", }, diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index b32e79dbef4f..de40b48b460e 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -18,6 +18,8 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/of_platform.h> +#include <linux/interrupt.h> +#include <linux/seq_file.h> #include <sysdev/fsl_soc.h> #include <asm/prom.h> #include <asm/hw_irq.h> @@ -50,6 +52,7 @@ struct fsl_msi_feature { struct fsl_msi_cascade_data { struct fsl_msi *msi_data; int index; + int virq; }; static inline u32 fsl_msi_read(u32 __iomem *base, unsigned int reg) @@ -65,11 +68,24 @@ static void fsl_msi_end_irq(struct irq_data *d) { } +static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p) +{ + struct fsl_msi *msi_data = irqd->domain->host_data; + irq_hw_number_t hwirq = irqd_to_hwirq(irqd); + int cascade_virq, srs; + + srs = (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK; + cascade_virq = msi_data->cascade_array[srs]->virq; + + seq_printf(p, " fsl-msi-%d", cascade_virq); +} + + static struct irq_chip fsl_msi_chip = { .irq_mask = mask_msi_irq, .irq_unmask = unmask_msi_irq, .irq_ack = fsl_msi_end_irq, - .name = "FSL-MSI", + .irq_print_chip = fsl_msi_print_chip, }; static int fsl_msi_host_map(struct irq_domain *h, unsigned int virq, @@ -175,7 +191,8 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) np = of_parse_phandle(hose->dn, "fsl,msi", 0); if (np) { if (of_device_is_compatible(np, "fsl,mpic-msi") || - of_device_is_compatible(np, "fsl,vmpic-msi")) + of_device_is_compatible(np, "fsl,vmpic-msi") || + of_device_is_compatible(np, "fsl,vmpic-msi-v4.3")) phandle = np->phandle; else { dev_err(&pdev->dev, @@ -234,40 +251,24 @@ out_free: return rc; } -static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) +static irqreturn_t fsl_msi_cascade(int irq, void *data) { - struct irq_chip *chip = irq_desc_get_chip(desc); - struct irq_data *idata = irq_desc_get_irq_data(desc); unsigned int cascade_irq; struct fsl_msi *msi_data; int msir_index = -1; u32 msir_value = 0; u32 intr_index; u32 have_shift = 0; - struct fsl_msi_cascade_data *cascade_data; + struct fsl_msi_cascade_data *cascade_data = data; + irqreturn_t ret = IRQ_NONE; - cascade_data = irq_get_handler_data(irq); msi_data = cascade_data->msi_data; - raw_spin_lock(&desc->lock); - if ((msi_data->feature & FSL_PIC_IP_MASK) == FSL_PIC_IP_IPIC) { - if (chip->irq_mask_ack) - chip->irq_mask_ack(idata); - else { - chip->irq_mask(idata); - chip->irq_ack(idata); - } - } - - if (unlikely(irqd_irq_inprogress(idata))) - goto unlock; - msir_index = cascade_data->index; if (msir_index >= NR_MSI_REG_MAX) cascade_irq = NO_IRQ; - irqd_set_chained_irq_inprogress(idata); switch (msi_data->feature & FSL_PIC_IP_MASK) { case FSL_PIC_IP_MPIC: msir_value = fsl_msi_read(msi_data->msi_regs, @@ -296,40 +297,32 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) cascade_irq = irq_linear_revmap(msi_data->irqhost, msi_hwirq(msi_data, msir_index, intr_index + have_shift)); - if (cascade_irq != NO_IRQ) + if (cascade_irq != NO_IRQ) { generic_handle_irq(cascade_irq); + ret = IRQ_HANDLED; + } have_shift += intr_index + 1; msir_value = msir_value >> (intr_index + 1); } - irqd_clr_chained_irq_inprogress(idata); - switch (msi_data->feature & FSL_PIC_IP_MASK) { - case FSL_PIC_IP_MPIC: - case FSL_PIC_IP_VMPIC: - chip->irq_eoi(idata); - break; - case FSL_PIC_IP_IPIC: - if (!irqd_irq_disabled(idata) && chip->irq_unmask) - chip->irq_unmask(idata); - break; - } -unlock: - raw_spin_unlock(&desc->lock); + return ret; } static int fsl_of_msi_remove(struct platform_device *ofdev) { struct fsl_msi *msi = platform_get_drvdata(ofdev); int virq, i; - struct fsl_msi_cascade_data *cascade_data; if (msi->list.prev != NULL) list_del(&msi->list); for (i = 0; i < NR_MSI_REG_MAX; i++) { - virq = msi->msi_virqs[i]; - if (virq != NO_IRQ) { - cascade_data = irq_get_handler_data(virq); - kfree(cascade_data); + if (msi->cascade_array[i]) { + virq = msi->cascade_array[i]->virq; + + BUG_ON(virq == NO_IRQ); + + free_irq(virq, msi->cascade_array[i]); + kfree(msi->cascade_array[i]); irq_dispose_mapping(virq); } } @@ -348,7 +341,7 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, int offset, int irq_index) { struct fsl_msi_cascade_data *cascade_data = NULL; - int virt_msir, i; + int virt_msir, i, ret; virt_msir = irq_of_parse_and_map(dev->dev.of_node, irq_index); if (virt_msir == NO_IRQ) { @@ -363,11 +356,18 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, return -ENOMEM; } irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); - msi->msi_virqs[irq_index] = virt_msir; cascade_data->index = offset; cascade_data->msi_data = msi; - irq_set_handler_data(virt_msir, cascade_data); - irq_set_chained_handler(virt_msir, fsl_msi_cascade); + cascade_data->virq = virt_msir; + msi->cascade_array[irq_index] = cascade_data; + + ret = request_irq(virt_msir, fsl_msi_cascade, 0, + "fsl-msi-cascade", cascade_data); + if (ret) { + dev_err(&dev->dev, "failed to request_irq(%d), ret = %d\n", + virt_msir, ret); + return ret; + } /* Release the hwirqs corresponding to this MSI register */ for (i = 0; i < IRQS_PER_MSI_REG; i++) @@ -461,7 +461,8 @@ static int fsl_of_msi_probe(struct platform_device *dev) p = of_get_property(dev->dev.of_node, "msi-available-ranges", &len); - if (of_device_is_compatible(dev->dev.of_node, "fsl,mpic-msi-v4.3")) { + if (of_device_is_compatible(dev->dev.of_node, "fsl,mpic-msi-v4.3") || + of_device_is_compatible(dev->dev.of_node, "fsl,vmpic-msi-v4.3")) { msi->srs_shift = MSIIR1_SRS_SHIFT; msi->ibs_shift = MSIIR1_IBS_SHIFT; if (p) @@ -566,6 +567,10 @@ static const struct of_device_id fsl_of_msi_ids[] = { .compatible = "fsl,vmpic-msi", .data = &vmpic_msi_feature, }, + { + .compatible = "fsl,vmpic-msi-v4.3", + .data = &vmpic_msi_feature, + }, #endif {} }; diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h index df9aa9fe0933..420cfcbdac01 100644 --- a/arch/powerpc/sysdev/fsl_msi.h +++ b/arch/powerpc/sysdev/fsl_msi.h @@ -27,6 +27,8 @@ #define FSL_PIC_IP_IPIC 0x00000002 #define FSL_PIC_IP_VMPIC 0x00000003 +struct fsl_msi_cascade_data; + struct fsl_msi { struct irq_domain *irqhost; @@ -37,7 +39,7 @@ struct fsl_msi { u32 srs_shift; /* Shift of the shared interrupt register select */ void __iomem *msi_regs; u32 feature; - int msi_virqs[NR_MSI_REG_MAX]; + struct fsl_msi_cascade_data *cascade_array[NR_MSI_REG_MAX]; struct msi_bitmap bitmap; diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index c5077673bd94..65d2ed4549e6 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -522,7 +522,8 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) } else { /* For PCI read PROG to identify controller mode */ early_read_config_byte(hose, 0, 0, PCI_CLASS_PROG, &progif); - if ((progif & 1) == 1) + if ((progif & 1) && + !of_property_read_bool(dev, "fsl,pci-agent-force-enum")) goto no_bridge; } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index be33c9768ea1..89cec0ed6a58 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -960,7 +960,7 @@ void mpic_set_vector(unsigned int virq, unsigned int vector) mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), vecpri); } -void mpic_set_destination(unsigned int virq, unsigned int cpuid) +static void mpic_set_destination(unsigned int virq, unsigned int cpuid) { struct mpic *mpic = mpic_from_irq(virq); unsigned int src = virq_to_hw(virq); diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 2ff630267e9e..0c75214b6f92 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -20,32 +20,37 @@ int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num) int offset, order = get_count_order(num); spin_lock_irqsave(&bmp->lock, flags); - /* - * This is fast, but stricter than we need. We might want to add - * a fallback routine which does a linear search with no alignment. - */ - offset = bitmap_find_free_region(bmp->bitmap, bmp->irq_count, order); + + offset = bitmap_find_next_zero_area(bmp->bitmap, bmp->irq_count, 0, + num, (1 << order) - 1); + if (offset > bmp->irq_count) + goto err; + + bitmap_set(bmp->bitmap, offset, num); spin_unlock_irqrestore(&bmp->lock, flags); - pr_debug("msi_bitmap: allocated 0x%x (2^%d) at offset 0x%x\n", - num, order, offset); + pr_debug("msi_bitmap: allocated 0x%x at offset 0x%x\n", num, offset); return offset; +err: + spin_unlock_irqrestore(&bmp->lock, flags); + return -ENOMEM; } +EXPORT_SYMBOL(msi_bitmap_alloc_hwirqs); void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset, unsigned int num) { unsigned long flags; - int order = get_count_order(num); - pr_debug("msi_bitmap: freeing 0x%x (2^%d) at offset 0x%x\n", - num, order, offset); + pr_debug("msi_bitmap: freeing 0x%x at offset 0x%x\n", + num, offset); spin_lock_irqsave(&bmp->lock, flags); - bitmap_release_region(bmp->bitmap, offset, order); + bitmap_clear(bmp->bitmap, offset, num); spin_unlock_irqrestore(&bmp->lock, flags); } +EXPORT_SYMBOL(msi_bitmap_free_hwirqs); void msi_bitmap_reserve_hwirq(struct msi_bitmap *bmp, unsigned int hwirq) { @@ -143,7 +148,7 @@ void msi_bitmap_free(struct msi_bitmap *bmp) #define check(x) \ if (!(x)) printk("msi_bitmap: test failed at line %d\n", __LINE__); -void __init test_basics(void) +static void __init test_basics(void) { struct msi_bitmap bmp; int i, size = 512; @@ -180,6 +185,15 @@ void __init test_basics(void) msi_bitmap_free_hwirqs(&bmp, size / 2, 1); check(msi_bitmap_alloc_hwirqs(&bmp, 1) == size / 2); + /* Check we get a naturally aligned offset */ + check(msi_bitmap_alloc_hwirqs(&bmp, 2) % 2 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 4) % 4 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 8) % 8 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 9) % 16 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 3) % 4 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 7) % 8 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 121) % 128 == 0); + msi_bitmap_free(&bmp); /* Clients may check bitmap == NULL for "not-allocated" */ @@ -188,7 +202,7 @@ void __init test_basics(void) kfree(bmp.bitmap); } -void __init test_of_node(void) +static void __init test_of_node(void) { u32 prop_data[] = { 10, 10, 25, 3, 40, 1, 100, 100, 200, 20 }; const char *expected_str = "0-9,20-24,28-39,41-99,220-255"; @@ -236,7 +250,7 @@ void __init test_of_node(void) kfree(bmp.bitmap); } -int __init msi_bitmap_selftest(void) +static int __init msi_bitmap_selftest(void) { printk(KERN_DEBUG "Running MSI bitmap self-tests ...\n"); diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c index c2dba7db71ad..026bbc3b2c47 100644 --- a/arch/powerpc/sysdev/mv64x60_dev.c +++ b/arch/powerpc/sysdev/mv64x60_dev.c @@ -23,7 +23,7 @@ /* These functions provide the necessary setup for the mv64x60 drivers. */ -static struct of_device_id __initdata of_mv64x60_devices[] = { +static const struct of_device_id of_mv64x60_devices[] __initconst = { { .compatible = "marvell,mv64306-devctrl", }, {} }; diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c index 5aaf86c03893..13e67d93a7c1 100644 --- a/arch/powerpc/sysdev/pmi.c +++ b/arch/powerpc/sysdev/pmi.c @@ -101,7 +101,7 @@ out: } -static struct of_device_id pmi_match[] = { +static const struct of_device_id pmi_match[] = { { .type = "ibm,pmi", .name = "ibm,pmi" }, { .type = "ibm,pmi" }, {}, diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index de8d9483bbe8..2fc4cf1b7557 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -155,6 +155,31 @@ static void icp_native_cause_ipi(int cpu, unsigned long data) icp_native_set_qirr(cpu, IPI_PRIORITY); } +/* + * Called when an interrupt is received on an off-line CPU to + * clear the interrupt, so that the CPU can go back to nap mode. + */ +void icp_native_flush_interrupt(void) +{ + unsigned int xirr = icp_native_get_xirr(); + unsigned int vec = xirr & 0x00ffffff; + + if (vec == XICS_IRQ_SPURIOUS) + return; + if (vec == XICS_IPI) { + /* Clear pending IPI */ + int cpu = smp_processor_id(); + kvmppc_set_host_ipi(cpu, 0); + icp_native_set_qirr(cpu, 0xff); + } else { + pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n", + vec); + xics_mask_unknown_vec(vec); + } + /* EOI the interrupt */ + icp_native_set_xirr(xirr); +} + void xics_wake_cpu(int cpu) { icp_native_set_qirr(cpu, IPI_PRIORITY); diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 83f943a8e0db..56f0524e47a6 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -265,7 +265,7 @@ static void __init xilinx_i8259_setup_cascade(void) static inline void xilinx_i8259_setup_cascade(void) { return; } #endif /* defined(CONFIG_PPC_I8259) */ -static struct of_device_id xilinx_intc_match[] __initconst = { +static const struct of_device_id xilinx_intc_match[] __initconst = { { .compatible = "xlnx,opb-intc-1.00.c", }, { .compatible = "xlnx,xps-intc-1.00.a", }, {} diff --git a/arch/powerpc/sysdev/xilinx_pci.c b/arch/powerpc/sysdev/xilinx_pci.c index 1453b0eed220..fea5667699ed 100644 --- a/arch/powerpc/sysdev/xilinx_pci.c +++ b/arch/powerpc/sysdev/xilinx_pci.c @@ -27,7 +27,7 @@ #define PCI_HOST_ENABLE_CMD PCI_COMMAND_SERR | PCI_COMMAND_PARITY | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY -static struct of_device_id xilinx_pci_match[] = { +static const struct of_device_id xilinx_pci_match[] = { { .compatible = "xlnx,plbv46-pci-1.03.a", }, {} }; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 05c78bb5f570..f2cf1f90295b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -58,6 +58,9 @@ config NO_IOPORT_MAP config PCI_QUIRKS def_bool n +config ARCH_SUPPORTS_UPROBES + def_bool 64BIT + config S390 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE @@ -97,6 +100,7 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS2 + select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES if !SMP select GENERIC_FIND_FIRST_BIT @@ -113,10 +117,11 @@ config S390 select HAVE_CMPXCHG_LOCAL select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK - select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE if 64BIT + select HAVE_DYNAMIC_FTRACE_WITH_REGS if 64BIT select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER if 64BIT + select HAVE_FUNCTION_TRACER if 64BIT select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP @@ -496,8 +501,8 @@ config QDIO menuconfig PCI bool "PCI support" - default n depends on 64BIT + select HAVE_DMA_ATTRS select PCI_MSI help Enable PCI support. @@ -544,9 +549,6 @@ config HAS_DMA config NEED_SG_DMA_LENGTH def_bool PCI -config HAVE_DMA_ATTRS - def_bool PCI - config NEED_DMA_MAP_STATE def_bool PCI diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 874e6d6e9c5f..878e67973151 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -35,13 +35,16 @@ endif export LD_BFD -cflags-$(CONFIG_MARCH_G5) += -march=g5 -cflags-$(CONFIG_MARCH_Z900) += -march=z900 -cflags-$(CONFIG_MARCH_Z990) += -march=z990 -cflags-$(CONFIG_MARCH_Z9_109) += -march=z9-109 -cflags-$(CONFIG_MARCH_Z10) += -march=z10 -cflags-$(CONFIG_MARCH_Z196) += -march=z196 -cflags-$(CONFIG_MARCH_ZEC12) += -march=zEC12 +mflags-$(CONFIG_MARCH_G5) := -march=g5 +mflags-$(CONFIG_MARCH_Z900) := -march=z900 +mflags-$(CONFIG_MARCH_Z990) := -march=z990 +mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109 +mflags-$(CONFIG_MARCH_Z10) := -march=z10 +mflags-$(CONFIG_MARCH_Z196) := -march=z196 +mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 + +aflags-y += $(mflags-y) +cflags-y += $(mflags-y) cflags-$(CONFIG_MARCH_G5_TUNE) += -mtune=g5 cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900 diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 19ff956b752b..b5dce6544d76 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -15,11 +15,13 @@ #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES /* Fast-BCR without checkpoint synchronization */ -#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0) +#define __ASM_BARRIER "bcr 14,0\n" #else -#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0) +#define __ASM_BARRIER "bcr 15,0\n" #endif +#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0) + #define rmb() mb() #define wmb() mb() #define read_barrier_depends() do { } while(0) diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index f65bd3634519..f8c196984853 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,8 +8,6 @@ #define _S390_CPUTIME_H #include <linux/types.h> -#include <linux/percpu.h> -#include <linux/spinlock.h> #include <asm/div64.h> @@ -18,6 +16,8 @@ typedef unsigned long long __nocast cputime_t; typedef unsigned long long __nocast cputime64_t; +#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new) + static inline unsigned long __div(unsigned long long n, unsigned long base) { #ifndef CONFIG_64BIT @@ -165,28 +165,8 @@ static inline clock_t cputime64_to_clock_t(cputime64_t cputime) return clock; } -struct s390_idle_data { - int nohz_delay; - unsigned int sequence; - unsigned long long idle_count; - unsigned long long idle_time; - unsigned long long clock_idle_enter; - unsigned long long clock_idle_exit; - unsigned long long timer_idle_enter; - unsigned long long timer_idle_exit; -}; - -DECLARE_PER_CPU(struct s390_idle_data, s390_idle); - -cputime64_t s390_get_idle_time(int cpu); - -#define arch_idle_time(cpu) s390_get_idle_time(cpu) - -static inline int s390_nohz_delay(int cpu) -{ - return __get_cpu_var(s390_idle).nohz_delay != 0; -} +cputime64_t arch_cpu_idle_time(int cpu); -#define arch_needs_cpu(cpu) s390_nohz_delay(cpu) +#define arch_idle_time(cpu) arch_cpu_idle_time(cpu) #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h index 04a83f5773cd..60323c21938b 100644 --- a/arch/s390/include/asm/dis.h +++ b/arch/s390/include/asm/dis.h @@ -13,12 +13,13 @@ #define OPERAND_FPR 0x2 /* Operand printed as %fx */ #define OPERAND_AR 0x4 /* Operand printed as %ax */ #define OPERAND_CR 0x8 /* Operand printed as %cx */ -#define OPERAND_DISP 0x10 /* Operand printed as displacement */ -#define OPERAND_BASE 0x20 /* Operand printed as base register */ -#define OPERAND_INDEX 0x40 /* Operand printed as index register */ -#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */ -#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */ -#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */ +#define OPERAND_VR 0x10 /* Operand printed as %vx */ +#define OPERAND_DISP 0x20 /* Operand printed as displacement */ +#define OPERAND_BASE 0x40 /* Operand printed as base register */ +#define OPERAND_INDEX 0x80 /* Operand printed as index register */ +#define OPERAND_PCREL 0x100 /* Operand printed as pc-relative symbol */ +#define OPERAND_SIGNED 0x200 /* Operand printed as signed value */ +#define OPERAND_LENGTH 0x400 /* Operand printed as length (+1) */ struct s390_operand { diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index 3fbc67d9e197..709955ddaa4d 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -56,24 +56,35 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return dma_addr == DMA_ERROR_CODE; } -static inline void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) +#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) + +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); - void *ret; + void *cpu_addr; + + BUG_ON(!ops); - ret = ops->alloc(dev, size, dma_handle, flag, NULL); - debug_dma_alloc_coherent(dev, size, *dma_handle, ret); - return ret; + cpu_addr = ops->alloc(dev, size, dma_handle, flags, attrs); + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + + return cpu_addr; } -static inline void dma_free_coherent(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) +#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL) + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!ops); debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); - dma_ops->free(dev, size, cpu_addr, dma_handle, NULL); + ops->free(dev, size, cpu_addr, dma_handle, attrs); } #endif /* _ASM_S390_DMA_MAPPING_H */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 78f4f8711d58..f6e43d39e3d8 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -102,6 +102,7 @@ #define HWCAP_S390_ETF3EH 256 #define HWCAP_S390_HIGH_GPRS 512 #define HWCAP_S390_TE 1024 +#define HWCAP_S390_VXRS 2048 /* * These are used to set parameters in the core dumps. @@ -225,6 +226,6 @@ int arch_setup_additional_pages(struct linux_binprm *, int); extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs); #endif diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index bf246dae1367..3aef8afec336 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ extern void _mcount(void); +extern char ftrace_graph_caller_end; struct dyn_arch_ftrace { }; @@ -17,10 +18,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_64BIT -#define MCOUNT_INSN_SIZE 12 -#else -#define MCOUNT_INSN_SIZE 22 -#endif +#define MCOUNT_INSN_SIZE 18 + +#define ARCH_SUPPORTS_FTRACE_OPS 1 #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h new file mode 100644 index 000000000000..6af037f574b8 --- /dev/null +++ b/arch/s390/include/asm/idle.h @@ -0,0 +1,26 @@ +/* + * Copyright IBM Corp. 2014 + * + * Author: Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _S390_IDLE_H +#define _S390_IDLE_H + +#include <linux/types.h> +#include <linux/device.h> + +struct s390_idle_data { + unsigned int sequence; + unsigned long long idle_count; + unsigned long long idle_time; + unsigned long long clock_idle_enter; + unsigned long long clock_idle_exit; + unsigned long long timer_idle_enter; + unsigned long long timer_idle_exit; +}; + +extern struct device_attribute dev_attr_idle_count; +extern struct device_attribute dev_attr_idle_time_us; + +#endif /* _S390_IDLE_H */ diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index c81661e756a0..ece606c2ee86 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -89,12 +89,12 @@ extern u32 ipl_flags; extern u32 dump_prefix_page; struct dump_save_areas { - struct save_area **areas; + struct save_area_ext **areas; int count; }; extern struct dump_save_areas dump_save_areas; -struct save_area *dump_save_area_create(int cpu); +struct save_area_ext *dump_save_area_create(int cpu); extern void do_reipl(void); extern void do_halt(void); diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index c4dd400a2791..e787cc1bff8f 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -51,6 +51,7 @@ enum interruption_class { IRQEXT_CMS, IRQEXT_CMC, IRQEXT_CMR, + IRQEXT_FTP, IRQIO_CIO, IRQIO_QAI, IRQIO_DAS, diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 4176dfe0fba1..98629173ce3b 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -84,6 +84,10 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr); int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); +int probe_is_prohibited_opcode(u16 *insn); +int probe_get_fixup_type(u16 *insn); +int probe_is_insn_relative_long(u16 *insn); + #define flush_insn_slot(p) do { } while (0) #endif /* _ASM_S390_KPROBES_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 4349197ab9df..6cc51fe84410 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -11,6 +11,7 @@ #include <linux/types.h> #include <asm/ptrace.h> #include <asm/cpu.h> +#include <asm/types.h> #ifdef CONFIG_32BIT @@ -31,6 +32,11 @@ struct save_area { u32 ctrl_regs[16]; } __packed; +struct save_area_ext { + struct save_area sa; + __vector128 vx_regs[32]; +}; + struct _lowcore { psw_t restart_psw; /* 0x0000 */ psw_t restart_old_psw; /* 0x0008 */ @@ -183,6 +189,11 @@ struct save_area { u64 ctrl_regs[16]; } __packed; +struct save_area_ext { + struct save_area sa; + __vector128 vx_regs[32]; +}; + struct _lowcore { __u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */ __u32 ipl_parmblock_ptr; /* 0x0014 */ @@ -310,7 +321,10 @@ struct _lowcore { /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ - __u8 pad_0x1000[0x11b8-0x1000]; /* 0x1000 */ + __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ + + /* Pointer to vector register save area */ + __u64 vector_save_area_addr; /* 0x11b0 */ /* 64 bit extparam used for pfault/diag 250: defined by architecture */ __u64 ext_params2; /* 0x11B8 */ @@ -334,9 +348,10 @@ struct _lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ + __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ - /* align to the top of the prefix area */ - __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ + /* Software defined save area for vector registers */ + __u8 vector_save_area[1024]; /* 0x1c00 */ } __packed; #endif /* CONFIG_32BIT */ diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 35f8ec185616..3027a5a72b74 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -38,7 +38,7 @@ struct mci { __u32 pm : 1; /* 22 psw program mask and cc validity */ __u32 ia : 1; /* 23 psw instruction address validity */ __u32 fa : 1; /* 24 failing storage address validity */ - __u32 : 1; /* 25 */ + __u32 vr : 1; /* 25 vector register validity */ __u32 ec : 1; /* 26 external damage code validity */ __u32 fp : 1; /* 27 floating point register validity */ __u32 gr : 1; /* 28 general register validity */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b7054356cc98..57c882761dea 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -217,7 +217,6 @@ extern unsigned long MODULES_END; */ /* Hardware bits in the page table entry */ -#define _PAGE_CO 0x100 /* HW Change-bit override */ #define _PAGE_PROTECT 0x200 /* HW read-only bit */ #define _PAGE_INVALID 0x400 /* HW invalid bit */ #define _PAGE_LARGE 0x800 /* Bit to mark a large pte */ @@ -234,8 +233,8 @@ extern unsigned long MODULES_END; #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \ - _PAGE_DIRTY | _PAGE_YOUNG) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \ + _PAGE_YOUNG) /* * handle_pte_fault uses pte_present, pte_none and pte_file to find out the @@ -354,7 +353,6 @@ extern unsigned long MODULES_END; #define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ #define _REGION3_ENTRY_RO 0x200 /* page protection bit */ -#define _REGION3_ENTRY_CO 0x100 /* change-recording override */ /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL @@ -371,7 +369,6 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ #define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_CO 0x0100 /* change-recording override */ #define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ #define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ @@ -873,8 +870,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); } else { - if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1) - pte_val(entry) |= _PAGE_CO; *ptep = entry; } } @@ -1044,6 +1039,22 @@ static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep) : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); } +static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep) +{ + unsigned long pto = (unsigned long) ptep; + +#ifndef CONFIG_64BIT + /* pto in ESA mode must point to the start of the segment table */ + pto &= 0x7ffffc00; +#endif + /* Invalidate a range of ptes + global TLB flush of the ptes */ + do { + asm volatile( + " .insn rrf,0xb2210000,%2,%0,%1,0" + : "+a" (address), "+a" (nr) : "a" (pto) : "memory"); + } while (nr != 255); +} + static inline void ptep_flush_direct(struct mm_struct *mm, unsigned long address, pte_t *ptep) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index e568fc8a7250..d559bdb03d18 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -13,9 +13,11 @@ #define CIF_MCCK_PENDING 0 /* machine check handling is pending */ #define CIF_ASCE 1 /* user asce needs fixup / uaccess */ +#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING) #define _CIF_ASCE (1<<CIF_ASCE) +#define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY) #ifndef __ASSEMBLY__ @@ -43,6 +45,8 @@ static inline int test_cpu_flag(int flag) return !!(S390_lowcore.cpu_flags & (1U << flag)); } +#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) + /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -113,6 +117,7 @@ struct thread_struct { int ri_signum; #ifdef CONFIG_64BIT unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ + __vector128 *vxrs; /* Vector register save area */ #endif }; @@ -285,7 +290,12 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) return (psw.addr - ilc) & mask; #endif } - + +/* + * Function to stop a processor until the next interrupt occurs + */ +void enabled_wait(void); + /* * Function to drop a processor into disabled wait state */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 55d69dd7473c..be317feff7ac 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -161,6 +161,12 @@ static inline long regs_return_value(struct pt_regs *regs) return regs->gprs[2]; } +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->psw.addr = val | PSW_ADDR_AMODE; +} + int regs_query_register_offset(const char *name); const char *regs_query_register_name(unsigned int offset); unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 089a49814c50..7736fdd72595 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -55,8 +55,8 @@ extern void detect_memory_memblock(void); #define MACHINE_FLAG_LPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) #define MACHINE_FLAG_TE (1UL << 15) -#define MACHINE_FLAG_RRBM (1UL << 16) #define MACHINE_FLAG_TLB_LC (1UL << 17) +#define MACHINE_FLAG_VX (1UL << 18) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -78,8 +78,8 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_LPP (0) #define MACHINE_HAS_TOPOLOGY (0) #define MACHINE_HAS_TE (0) -#define MACHINE_HAS_RRBM (0) #define MACHINE_HAS_TLB_LC (0) +#define MACHINE_HAS_VX (0) #else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -91,8 +91,8 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) -#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) +#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) #endif /* CONFIG_64BIT */ /* diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index bf9c823d4020..49576115dbb7 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -15,6 +15,7 @@ #define SIGP_SET_ARCHITECTURE 18 #define SIGP_COND_EMERGENCY_SIGNAL 19 #define SIGP_SENSE_RUNNING 21 +#define SIGP_STORE_ADDITIONAL_STATUS 23 /* SIGP condition codes */ #define SIGP_CC_ORDER_CODE_ACCEPTED 0 @@ -33,9 +34,10 @@ #ifndef __ASSEMBLY__ -static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) +static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, + u32 *status) { - register unsigned int reg1 asm ("1") = parm; + register unsigned long reg1 asm ("1") = parm; int cc; asm volatile( diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 4f1307962a95..762d4f88af5a 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -29,7 +29,6 @@ extern int smp_find_processor_id(u16 address); extern int smp_store_status(int cpu); extern int smp_vcpu_scheduled(int cpu); extern void smp_yield_cpu(int cpu); -extern void smp_yield(void); extern void smp_cpu_set_polarization(int cpu, int val); extern int smp_cpu_get_polarization(int cpu); extern void smp_fill_possible_mask(void); @@ -50,7 +49,6 @@ static inline int smp_find_processor_id(u16 address) { return 0; } static inline int smp_store_status(int cpu) { return 0; } static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } -static inline void smp_yield(void) { } static inline void smp_fill_possible_mask(void) { } #endif /* CONFIG_SMP */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 96879f7ad6da..d6bdf906caa5 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -37,11 +37,17 @@ _raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) * (the type definitions are in asm/spinlock_types.h) */ +void arch_lock_relax(unsigned int cpu); + void arch_spin_lock_wait(arch_spinlock_t *); int arch_spin_trylock_retry(arch_spinlock_t *); -void arch_spin_relax(arch_spinlock_t *); void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); +static inline void arch_spin_relax(arch_spinlock_t *lock) +{ + arch_lock_relax(lock->lock); +} + static inline u32 arch_spin_lockval(int cpu) { return ~cpu; @@ -64,11 +70,6 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); } -static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp) -{ - return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0); -} - static inline void arch_spin_lock(arch_spinlock_t *lp) { if (!arch_spin_trylock_once(lp)) @@ -91,7 +92,13 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp) static inline void arch_spin_unlock(arch_spinlock_t *lp) { - arch_spin_tryrelease_once(lp); + typecheck(unsigned int, lp->lock); + asm volatile( + __ASM_BARRIER + "st %1,%0\n" + : "+Q" (lp->lock) + : "d" (0) + : "cc", "memory"); } static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) @@ -123,13 +130,12 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) */ #define arch_write_can_lock(x) ((x)->lock == 0) -extern void _raw_read_lock_wait(arch_rwlock_t *lp); -extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); extern int _raw_read_trylock_retry(arch_rwlock_t *lp); -extern void _raw_write_lock_wait(arch_rwlock_t *lp); -extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); extern int _raw_write_trylock_retry(arch_rwlock_t *lp); +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + static inline int arch_read_trylock_once(arch_rwlock_t *rw) { unsigned int old = ACCESS_ONCE(rw->lock); @@ -144,16 +150,82 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw) _raw_compare_and_swap(&rw->lock, 0, 0x80000000)); } +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define __RAW_OP_OR "lao" +#define __RAW_OP_AND "lan" +#define __RAW_OP_ADD "laa" + +#define __RAW_LOCK(ptr, op_val, op_string) \ +({ \ + unsigned int old_val; \ + \ + typecheck(unsigned int *, ptr); \ + asm volatile( \ + op_string " %0,%2,%1\n" \ + "bcr 14,0\n" \ + : "=d" (old_val), "+Q" (*ptr) \ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +#define __RAW_UNLOCK(ptr, op_val, op_string) \ +({ \ + unsigned int old_val; \ + \ + typecheck(unsigned int *, ptr); \ + asm volatile( \ + "bcr 14,0\n" \ + op_string " %0,%2,%1\n" \ + : "=d" (old_val), "+Q" (*ptr) \ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +extern void _raw_read_lock_wait(arch_rwlock_t *lp); +extern void _raw_write_lock_wait(arch_rwlock_t *lp, unsigned int prev); + static inline void arch_read_lock(arch_rwlock_t *rw) { - if (!arch_read_trylock_once(rw)) + unsigned int old; + + old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD); + if ((int) old < 0) _raw_read_lock_wait(rw); } -static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + __RAW_UNLOCK(&rw->lock, -1, __RAW_OP_ADD); +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned int old; + + old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); + if (old != 0) + _raw_write_lock_wait(rw, old); + rw->owner = SPINLOCK_LOCKVAL; +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + rw->owner = 0; + __RAW_UNLOCK(&rw->lock, 0x7fffffff, __RAW_OP_AND); +} + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +extern void _raw_read_lock_wait(arch_rwlock_t *lp); +extern void _raw_write_lock_wait(arch_rwlock_t *lp); + +static inline void arch_read_lock(arch_rwlock_t *rw) { if (!arch_read_trylock_once(rw)) - _raw_read_lock_wait_flags(rw, flags); + _raw_read_lock_wait(rw); } static inline void arch_read_unlock(arch_rwlock_t *rw) @@ -169,19 +241,24 @@ static inline void arch_write_lock(arch_rwlock_t *rw) { if (!arch_write_trylock_once(rw)) _raw_write_lock_wait(rw); -} - -static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) -{ - if (!arch_write_trylock_once(rw)) - _raw_write_lock_wait_flags(rw, flags); + rw->owner = SPINLOCK_LOCKVAL; } static inline void arch_write_unlock(arch_rwlock_t *rw) { - _raw_compare_and_swap(&rw->lock, 0x80000000, 0); + typecheck(unsigned int, rw->lock); + + rw->owner = 0; + asm volatile( + __ASM_BARRIER + "st %1,%0\n" + : "+Q" (rw->lock) + : "d" (0) + : "cc", "memory"); } +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + static inline int arch_read_trylock(arch_rwlock_t *rw) { if (!arch_read_trylock_once(rw)) @@ -191,12 +268,20 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) static inline int arch_write_trylock(arch_rwlock_t *rw) { - if (!arch_write_trylock_once(rw)) - return _raw_write_trylock_retry(rw); + if (!arch_write_trylock_once(rw) && !_raw_write_trylock_retry(rw)) + return 0; + rw->owner = SPINLOCK_LOCKVAL; return 1; } -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() +static inline void arch_read_relax(arch_rwlock_t *rw) +{ + arch_lock_relax(rw->owner); +} + +static inline void arch_write_relax(arch_rwlock_t *rw) +{ + arch_lock_relax(rw->owner); +} #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index b2cd6ff7c2c5..d84b6939237c 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -13,6 +13,7 @@ typedef struct { typedef struct { unsigned int lock; + unsigned int owner; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 18ea9e3f8142..2542a7e4c8b4 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -103,6 +103,61 @@ static inline void restore_fp_regs(freg_t *fprs) asm volatile("ld 15,%0" : : "Q" (fprs[15])); } +static inline void save_vx_regs(__vector128 *vxrs) +{ + typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; + + asm volatile( + " la 1,%0\n" + " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ + : "=Q" (*(addrtype *) vxrs) : : "1"); +} + +static inline void save_vx_regs_safe(__vector128 *vxrs) +{ + unsigned long cr0, flags; + + flags = arch_local_irq_save(); + __ctl_store(cr0, 0, 0); + __ctl_set_bit(0, 17); + __ctl_set_bit(0, 18); + save_vx_regs(vxrs); + __ctl_load(cr0, 0, 0); + arch_local_irq_restore(flags); +} + +static inline void restore_vx_regs(__vector128 *vxrs) +{ + typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; + + asm volatile( + " la 1,%0\n" + " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ + : : "Q" (*(addrtype *) vxrs) : "1"); +} + +static inline void save_fp_vx_regs(struct task_struct *task) +{ +#ifdef CONFIG_64BIT + if (task->thread.vxrs) + save_vx_regs(task->thread.vxrs); + else +#endif + save_fp_regs(task->thread.fp_regs.fprs); +} + +static inline void restore_fp_vx_regs(struct task_struct *task) +{ +#ifdef CONFIG_64BIT + if (task->thread.vxrs) + restore_vx_regs(task->thread.vxrs); + else +#endif + restore_fp_regs(task->thread.fp_regs.fprs); +} + static inline void save_access_regs(unsigned int *acrs) { typedef struct { int _[NUM_ACRS]; } acrstype; @@ -120,16 +175,16 @@ static inline void restore_access_regs(unsigned int *acrs) #define switch_to(prev,next,last) do { \ if (prev->mm) { \ save_fp_ctl(&prev->thread.fp_regs.fpc); \ - save_fp_regs(prev->thread.fp_regs.fprs); \ + save_fp_vx_regs(prev); \ save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ } \ if (next->mm) { \ + update_cr_regs(next); \ restore_fp_ctl(&next->thread.fp_regs.fpc); \ - restore_fp_regs(next->thread.fp_regs.fprs); \ + restore_fp_vx_regs(next); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ - update_cr_regs(next); \ } \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index b833e9c0bfbf..4d62fd5b56e5 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -84,11 +84,13 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ #define TIF_SECCOMP 5 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ +#define TIF_UPROBE 7 /* breakpointed or single-stepping */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ #define TIF_SINGLE_STEP 19 /* This task is single stepped */ #define TIF_BLOCK_STEP 20 /* This task is block stepped */ +#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */ #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) @@ -97,6 +99,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) +#define _TIF_UPROBE (1<<TIF_UPROBE) #define _TIF_31BIT (1<<TIF_31BIT) #define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) diff --git a/arch/s390/include/asm/uprobes.h b/arch/s390/include/asm/uprobes.h new file mode 100644 index 000000000000..1411dff7fea7 --- /dev/null +++ b/arch/s390/include/asm/uprobes.h @@ -0,0 +1,42 @@ +/* + * User-space Probes (UProbes) for s390 + * + * Copyright IBM Corp. 2014 + * Author(s): Jan Willeke, + */ + +#ifndef _ASM_UPROBES_H +#define _ASM_UPROBES_H + +#include <linux/notifier.h> + +typedef u16 uprobe_opcode_t; + +#define UPROBE_XOL_SLOT_BYTES 256 /* cache aligned */ + +#define UPROBE_SWBP_INSN 0x0002 +#define UPROBE_SWBP_INSN_SIZE 2 + +struct arch_uprobe { + union{ + uprobe_opcode_t insn[3]; + uprobe_opcode_t ixol[3]; + }; + unsigned int saved_per : 1; + unsigned int saved_int_code; +}; + +struct arch_uprobe_task { +}; + +int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, + unsigned long addr); +int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); +int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); +bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); +int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, + void *data); +void arch_uprobe_abort_xol(struct arch_uprobe *ap, struct pt_regs *regs); +unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, + struct pt_regs *regs); +#endif /* _ASM_UPROBES_H */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index bc9746a7d47c..a62526d09201 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -22,13 +22,17 @@ struct vdso_data { __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */ __u64 xtime_clock_sec; /* Kernel time 0x10 */ __u64 xtime_clock_nsec; /* 0x18 */ - __u64 wtom_clock_sec; /* Wall to monotonic clock 0x20 */ - __u64 wtom_clock_nsec; /* 0x28 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ - __u32 tz_dsttime; /* Type of dst correction 0x34 */ - __u32 ectg_available; /* ECTG instruction present 0x38 */ - __u32 tk_mult; /* Mult. used for xtime_nsec 0x3c */ - __u32 tk_shift; /* Shift used for xtime_nsec 0x40 */ + __u64 xtime_coarse_sec; /* Coarse kernel time 0x20 */ + __u64 xtime_coarse_nsec; /* 0x28 */ + __u64 wtom_clock_sec; /* Wall to monotonic clock 0x30 */ + __u64 wtom_clock_nsec; /* 0x38 */ + __u64 wtom_coarse_sec; /* Coarse wall to monotonic 0x40 */ + __u64 wtom_coarse_nsec; /* 0x48 */ + __u32 tz_minuteswest; /* Minutes west of Greenwich 0x50 */ + __u32 tz_dsttime; /* Type of dst correction 0x54 */ + __u32 ectg_available; /* ECTG instruction present 0x58 */ + __u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */ + __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h index bfe25d513ad2..10a179af62d8 100644 --- a/arch/s390/include/asm/vtimer.h +++ b/arch/s390/include/asm/vtimer.h @@ -28,6 +28,4 @@ extern int del_virt_timer(struct vtimer_list *timer); extern void init_cpu_vtimer(void); extern void vtime_init(void); -extern void vtime_stop_cpu(void); - #endif /* _ASM_S390_TIMER_H */ diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h index b30de9c01bbe..5f0b8d7ddb0b 100644 --- a/arch/s390/include/uapi/asm/sigcontext.h +++ b/arch/s390/include/uapi/asm/sigcontext.h @@ -7,10 +7,14 @@ #define _ASM_S390_SIGCONTEXT_H #include <linux/compiler.h> +#include <linux/types.h> -#define __NUM_GPRS 16 -#define __NUM_FPRS 16 -#define __NUM_ACRS 16 +#define __NUM_GPRS 16 +#define __NUM_FPRS 16 +#define __NUM_ACRS 16 +#define __NUM_VXRS 32 +#define __NUM_VXRS_LOW 16 +#define __NUM_VXRS_HIGH 16 #ifndef __s390x__ @@ -59,6 +63,16 @@ typedef struct _s390_fp_regs fpregs; } _sigregs; +typedef struct +{ +#ifndef __s390x__ + unsigned long gprs_high[__NUM_GPRS]; +#endif + unsigned long long vxrs_low[__NUM_VXRS_LOW]; + __vector128 vxrs_high[__NUM_VXRS_HIGH]; + unsigned char __reserved[128]; +} _sigregs_ext; + struct sigcontext { unsigned long oldmask[_SIGCONTEXT_NSIG_WORDS]; diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h index 038f2b9178a4..3c3951e3415b 100644 --- a/arch/s390/include/uapi/asm/types.h +++ b/arch/s390/include/uapi/asm/types.h @@ -17,6 +17,10 @@ typedef unsigned long addr_t; typedef __signed__ long saddr_t; +typedef struct { + __u32 u[4]; +} __vector128; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI_S390_TYPES_H */ diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h index 3e077b2a4705..64a69aa5dde0 100644 --- a/arch/s390/include/uapi/asm/ucontext.h +++ b/arch/s390/include/uapi/asm/ucontext.h @@ -7,10 +7,15 @@ #ifndef _ASM_S390_UCONTEXT_H #define _ASM_S390_UCONTEXT_H -#define UC_EXTENDED 0x00000001 - -#ifndef __s390x__ +#define UC_GPRS_HIGH 1 /* uc_mcontext_ext has valid high gprs */ +#define UC_VXRS 2 /* uc_mcontext_ext has valid vector regs */ +/* + * The struct ucontext_extended describes how the registers are stored + * on a rt signal frame. Please note that the structure is not fixed, + * if new CPU registers are added to the user state the size of the + * struct ucontext_extended will increase. + */ struct ucontext_extended { unsigned long uc_flags; struct ucontext *uc_link; @@ -19,11 +24,9 @@ struct ucontext_extended { sigset_t uc_sigmask; /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ unsigned char __unused[128 - sizeof(sigset_t)]; - unsigned long uc_gprs_high[16]; + _sigregs_ext uc_mcontext_ext; }; -#endif - struct ucontext { unsigned long uc_flags; struct ucontext *uc_link; diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index a95c4ca99617..204c43a4c245 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -28,7 +28,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w -obj-y := traps.o time.o process.o base.o early.o setup.o vtime.o +obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o @@ -52,11 +52,9 @@ obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y) obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_UPROBES) += uprobes.o ifdef CONFIG_64BIT obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index afe1715a4eb7..ef279a136801 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -9,7 +9,7 @@ #include <linux/kbuild.h> #include <linux/kvm_host.h> #include <linux/sched.h> -#include <asm/cputime.h> +#include <asm/idle.h> #include <asm/vdso.h> #include <asm/pgtable.h> @@ -62,8 +62,12 @@ int main(void) DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); + DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); + DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec)); + DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec)); DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult)); @@ -73,8 +77,11 @@ int main(void) /* constants used by the vdso */ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); + DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter)); diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 70d4b7c4beaa..a0a886c04977 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -50,6 +50,14 @@ typedef struct _s390_fp_regs32 fpregs; } _sigregs32; +typedef struct +{ + __u32 gprs_high[__NUM_GPRS]; + __u64 vxrs_low[__NUM_VXRS_LOW]; + __vector128 vxrs_high[__NUM_VXRS_HIGH]; + __u8 __reserved[128]; +} _sigregs_ext32; + #define _SIGCONTEXT_NSIG32 64 #define _SIGCONTEXT_NSIG_BPW32 32 #define __SIGNAL_FRAMESIZE32 96 @@ -72,6 +80,7 @@ struct ucontext32 { compat_sigset_t uc_sigmask; /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ unsigned char __unused[128 - sizeof(compat_sigset_t)]; + _sigregs_ext32 uc_mcontext_ext; }; struct stat64_emu31; diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 598b0b42668b..009f5eb11125 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -36,17 +36,16 @@ typedef struct struct sigcontext32 sc; _sigregs32 sregs; int signo; - __u32 gprs_high[NUM_GPRS]; - __u8 retcode[S390_SYSCALL_SIZE]; + _sigregs_ext32 sregs_ext; + __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */ } sigframe32; typedef struct { __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; - __u8 retcode[S390_SYSCALL_SIZE]; + __u16 svc_insn; compat_siginfo_t info; struct ucontext32 uc; - __u32 gprs_high[NUM_GPRS]; } rt_sigframe32; int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) @@ -151,6 +150,38 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) return err ? -EFAULT : 0; } +/* Store registers needed to create the signal frame */ +static void store_sigregs(void) +{ + int i; + + save_access_regs(current->thread.acrs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + if (current->thread.vxrs) { + save_vx_regs(current->thread.vxrs); + for (i = 0; i < __NUM_FPRS; i++) + current->thread.fp_regs.fprs[i] = + *(freg_t *)(current->thread.vxrs + i); + } else + save_fp_regs(current->thread.fp_regs.fprs); +} + +/* Load registers after signal return */ +static void load_sigregs(void) +{ + int i; + + restore_access_regs(current->thread.acrs); + /* restore_fp_ctl is done in restore_sigregs */ + if (current->thread.vxrs) { + for (i = 0; i < __NUM_FPRS; i++) + *(freg_t *)(current->thread.vxrs + i) = + current->thread.fp_regs.fprs[i]; + restore_vx_regs(current->thread.vxrs); + } else + restore_fp_regs(current->thread.fp_regs.fprs); +} + static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) { _sigregs32 user_sregs; @@ -163,11 +194,8 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) (__u32)(regs->psw.mask & PSW_MASK_BA); for (i = 0; i < NUM_GPRS; i++) user_sregs.regs.gprs[i] = (__u32) regs->gprs[i]; - save_access_regs(current->thread.acrs); memcpy(&user_sregs.regs.acrs, current->thread.acrs, sizeof(user_sregs.regs.acrs)); - save_fp_ctl(¤t->thread.fp_regs.fpc); - save_fp_regs(current->thread.fp_regs.fprs); memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, sizeof(user_sregs.fpregs)); if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32))) @@ -207,37 +235,67 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) regs->gprs[i] = (__u64) user_sregs.regs.gprs[i]; memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(current->thread.acrs)); - restore_access_regs(current->thread.acrs); memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, sizeof(current->thread.fp_regs)); - restore_fp_regs(current->thread.fp_regs.fprs); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } -static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) +static int save_sigregs_ext32(struct pt_regs *regs, + _sigregs_ext32 __user *sregs_ext) { __u32 gprs_high[NUM_GPRS]; + __u64 vxrs[__NUM_VXRS_LOW]; int i; + /* Save high gprs to signal stack */ for (i = 0; i < NUM_GPRS; i++) gprs_high[i] = regs->gprs[i] >> 32; - if (__copy_to_user(uregs, &gprs_high, sizeof(gprs_high))) + if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high, + sizeof(sregs_ext->gprs_high))) return -EFAULT; + + /* Save vector registers to signal stack */ + if (current->thread.vxrs) { + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1); + if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, + sizeof(sregs_ext->vxrs_low)) || + __copy_to_user(&sregs_ext->vxrs_high, + current->thread.vxrs + __NUM_VXRS_LOW, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + } return 0; } -static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) +static int restore_sigregs_ext32(struct pt_regs *regs, + _sigregs_ext32 __user *sregs_ext) { __u32 gprs_high[NUM_GPRS]; + __u64 vxrs[__NUM_VXRS_LOW]; int i; - if (__copy_from_user(&gprs_high, uregs, sizeof(gprs_high))) + /* Restore high gprs from signal stack */ + if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high, + sizeof(&sregs_ext->gprs_high))) return -EFAULT; for (i = 0; i < NUM_GPRS; i++) *(__u32 *)®s->gprs[i] = gprs_high[i]; + + /* Restore vector registers from signal stack */ + if (current->thread.vxrs) { + if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, + sizeof(sregs_ext->vxrs_low)) || + __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW, + &sregs_ext->vxrs_high, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + for (i = 0; i < __NUM_VXRS_LOW; i++) + *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i]; + } return 0; } @@ -252,8 +310,9 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; - if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + if (restore_sigregs_ext32(regs, &frame->sregs_ext)) goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -269,12 +328,13 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); + if (compat_restore_altstack(&frame->uc.uc_stack)) + goto badframe; if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) goto badframe; - if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) goto badframe; - if (compat_restore_altstack(&frame->uc.uc_stack)) - goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -324,37 +384,64 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { int sig = ksig->sig; - sigframe32 __user *frame = get_sigframe(&ksig->ka, regs, sizeof(sigframe32)); - + sigframe32 __user *frame; + struct sigcontext32 sc; + unsigned long restorer; + size_t frame_size; + + /* + * gprs_high are always present for 31-bit compat tasks. + * The space for vector registers is only allocated if + * the machine supports it + */ + frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved); + if (!MACHINE_HAS_VX) + frame_size -= sizeof(frame->sregs_ext.vxrs_low) + + sizeof(frame->sregs_ext.vxrs_high); + frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32)) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + return -EFAULT; + + /* Create struct sigcontext32 on the signal stack */ + memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32); + sc.sregs = (__u32)(unsigned long __force) &frame->sregs; + if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) return -EFAULT; + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create _sigregs32 on the signal stack */ if (save_sigregs32(regs, &frame->sregs)) return -EFAULT; - if (save_sigregs_gprs_high(regs, frame->gprs_high)) + + /* Place signal number on stack to allow backtrace from handler. */ + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) return -EFAULT; - if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs)) + + /* Create _sigregs_ext32 on the signal stack */ + if (save_sigregs_ext32(regs, &frame->sregs_ext)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64 __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; + restorer = (unsigned long __force) + ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __force __user *)(frame->retcode))) + /* Signal frames without vectors registers are short ! */ + __u16 __user *svc = (void *) frame + frame_size - 2; + if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) - return -EFAULT; - /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (__force __u64) frame; /* Force 31 bit amode and default user address space control. */ regs->psw.mask = PSW_MASK_BA | @@ -375,50 +462,69 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, regs->gprs[6] = task_thread_info(current)->last_break; } - /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) - return -EFAULT; return 0; } static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - int err = 0; - rt_sigframe32 __user *frame = get_sigframe(&ksig->ka, regs, sizeof(rt_sigframe32)); - + rt_sigframe32 __user *frame; + unsigned long restorer; + size_t frame_size; + u32 uc_flags; + + frame_size = sizeof(*frame) - + sizeof(frame->uc.uc_mcontext_ext.__reserved); + /* + * gprs_high are always present for 31-bit compat tasks. + * The space for vector registers is only allocated if + * the machine supports it + */ + uc_flags = UC_GPRS_HIGH; + if (MACHINE_HAS_VX) { + if (current->thread.vxrs) + uc_flags |= UC_VXRS; + } else + frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) + + sizeof(frame->uc.uc_mcontext_ext.vxrs_high); + frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (copy_siginfo_to_user32(&frame->info, &ksig->info)) - return -EFAULT; - - /* Create the ucontext. */ - err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]); - err |= save_sigregs32(regs, &frame->uc.uc_mcontext); - err |= save_sigregs_gprs_high(regs, frame->gprs_high); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64 __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; + restorer = (unsigned long __force) + ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __force __user *)(frame->retcode))) + __u16 __user *svc = &frame->svc_insn; + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) + /* Create siginfo on the signal stack */ + if (copy_siginfo_to_user32(&frame->info, &ksig->info)) + return -EFAULT; + + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create ucontext on the signal stack. */ + if (__put_user(uc_flags, &frame->uc.uc_flags) || + __put_user(0, &frame->uc.uc_link) || + __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || + save_sigregs32(regs, &frame->uc.uc_mcontext) || + __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (__force __u64) frame; /* Force 31 bit amode and default user address space control. */ regs->psw.mask = PSW_MASK_BA | diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index a3b9150e6802..9f73c8059022 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -46,9 +46,9 @@ struct dump_save_areas dump_save_areas; /* * Allocate and add a save area for a CPU */ -struct save_area *dump_save_area_create(int cpu) +struct save_area_ext *dump_save_area_create(int cpu) { - struct save_area **save_areas, *save_area; + struct save_area_ext **save_areas, *save_area; save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); if (!save_area) @@ -386,9 +386,45 @@ static void *nt_s390_prefix(void *ptr, struct save_area *sa) } /* + * Initialize vxrs high note (full 128 bit VX registers 16-31) + */ +static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs) +{ + return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16], + 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize vxrs low note (lower halves of VX registers 0-15) + */ +static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) +{ + Elf64_Nhdr *note; + u64 len; + int i; + + note = (Elf64_Nhdr *)ptr; + note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1; + note->n_descsz = 16 * 8; + note->n_type = NT_S390_VXRS_LOW; + len = sizeof(Elf64_Nhdr); + + memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + ptr += len; + /* Copy lower halves of SIMD registers 0-15 */ + for (i = 0; i < 16; i++) { + memcpy(ptr, &vx_regs[i], 8); + ptr += 8; + } + return ptr; +} + +/* * Fill ELF notes for one CPU with save area registers */ -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs) { ptr = nt_prstatus(ptr, sa); ptr = nt_fpregset(ptr, sa); @@ -397,6 +433,10 @@ void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) ptr = nt_s390_tod_preg(ptr, sa); ptr = nt_s390_ctrs(ptr, sa); ptr = nt_s390_prefix(ptr, sa); + if (MACHINE_HAS_VX && vx_regs) { + ptr = nt_s390_vx_low(ptr, vx_regs); + ptr = nt_s390_vx_high(ptr, vx_regs); + } return ptr; } @@ -484,7 +524,7 @@ static int get_cpu_cnt(void) int i, cpus = 0; for (i = 0; i < dump_save_areas.count; i++) { - if (dump_save_areas.areas[i]->pref_reg == 0) + if (dump_save_areas.areas[i]->sa.pref_reg == 0) continue; cpus++; } @@ -530,17 +570,17 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) */ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) { - struct save_area *sa; + struct save_area_ext *sa_ext; void *ptr_start = ptr; int i; ptr = nt_prpsinfo(ptr); for (i = 0; i < dump_save_areas.count; i++) { - sa = dump_save_areas.areas[i]; - if (sa->pref_reg == 0) + sa_ext = dump_save_areas.areas[i]; + if (sa_ext->sa.pref_reg == 0) continue; - ptr = fill_cpu_elf_notes(ptr, sa); + ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs); } ptr = nt_vmcoreinfo(ptr); memset(phdr, 0, sizeof(*phdr)); @@ -581,7 +621,7 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) mem_chunk_cnt = get_mem_chunk_cnt(); - alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 + mem_chunk_cnt * sizeof(Elf64_Phdr); hdr = kzalloc_panic(alloc_size); /* Init elf header */ diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 993efe6a887c..f3762937dd82 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -60,6 +60,11 @@ enum { A_28, /* Access reg. starting at position 28 */ C_8, /* Control reg. starting at position 8 */ C_12, /* Control reg. starting at position 12 */ + V_8, /* Vector reg. starting at position 8, extension bit at 36 */ + V_12, /* Vector reg. starting at position 12, extension bit at 37 */ + V_16, /* Vector reg. starting at position 16, extension bit at 38 */ + V_32, /* Vector reg. starting at position 32, extension bit at 39 */ + W_12, /* Vector reg. at bit 12, extension at bit 37, used as index */ B_16, /* Base register starting at position 16 */ B_32, /* Base register starting at position 32 */ X_12, /* Index register starting at position 12 */ @@ -82,6 +87,8 @@ enum { U8_24, /* 8 bit unsigned value starting at 24 */ U8_32, /* 8 bit unsigned value starting at 32 */ I8_8, /* 8 bit signed value starting at 8 */ + I8_16, /* 8 bit signed value starting at 16 */ + I8_24, /* 8 bit signed value starting at 24 */ I8_32, /* 8 bit signed value starting at 32 */ J12_12, /* PC relative offset at 12 */ I16_16, /* 16 bit signed value starting at 16 */ @@ -96,6 +103,9 @@ enum { U32_16, /* 32 bit unsigned value starting at 16 */ M_16, /* 4 bit optional mask starting at 16 */ M_20, /* 4 bit optional mask starting at 20 */ + M_24, /* 4 bit optional mask starting at 24 */ + M_28, /* 4 bit optional mask starting at 28 */ + M_32, /* 4 bit optional mask starting at 32 */ RO_28, /* optional GPR starting at position 28 */ }; @@ -130,7 +140,7 @@ enum { INSTR_RSY_RDRM, INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, INSTR_RS_RURD, - INSTR_RXE_FRRD, INSTR_RXE_RRRD, + INSTR_RXE_FRRD, INSTR_RXE_RRRD, INSTR_RXE_RRRDM, INSTR_RXF_FRRDF, INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD, INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD, @@ -143,6 +153,17 @@ enum { INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, INSTR_S_00, INSTR_S_RD, + INSTR_VRI_V0IM, INSTR_VRI_V0I0, INSTR_VRI_V0IIM, INSTR_VRI_VVIM, + INSTR_VRI_VVV0IM, INSTR_VRI_VVV0I0, INSTR_VRI_VVIMM, + INSTR_VRR_VV00MMM, INSTR_VRR_VV000MM, INSTR_VRR_VV0000M, + INSTR_VRR_VV00000, INSTR_VRR_VVV0M0M, INSTR_VRR_VV00M0M, + INSTR_VRR_VVV000M, INSTR_VRR_VVV000V, INSTR_VRR_VVV0000, + INSTR_VRR_VVV0MMM, INSTR_VRR_VVV00MM, INSTR_VRR_VVVMM0V, + INSTR_VRR_VVVM0MV, INSTR_VRR_VVVM00V, INSTR_VRR_VRR0000, + INSTR_VRS_VVRDM, INSTR_VRS_VVRD0, INSTR_VRS_VRRDM, INSTR_VRS_VRRD0, + INSTR_VRS_RVRDM, + INSTR_VRV_VVRDM, INSTR_VRV_VWRDM, + INSTR_VRX_VRRDM, INSTR_VRX_VRRD0, }; static const struct s390_operand operands[] = @@ -168,6 +189,11 @@ static const struct s390_operand operands[] = [A_28] = { 4, 28, OPERAND_AR }, [C_8] = { 4, 8, OPERAND_CR }, [C_12] = { 4, 12, OPERAND_CR }, + [V_8] = { 4, 8, OPERAND_VR }, + [V_12] = { 4, 12, OPERAND_VR }, + [V_16] = { 4, 16, OPERAND_VR }, + [V_32] = { 4, 32, OPERAND_VR }, + [W_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR }, [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR }, [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR }, [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR }, @@ -190,6 +216,11 @@ static const struct s390_operand operands[] = [U8_24] = { 8, 24, 0 }, [U8_32] = { 8, 32, 0 }, [J12_12] = { 12, 12, OPERAND_PCREL }, + [I8_8] = { 8, 8, OPERAND_SIGNED }, + [I8_16] = { 8, 16, OPERAND_SIGNED }, + [I8_24] = { 8, 24, OPERAND_SIGNED }, + [I8_32] = { 8, 32, OPERAND_SIGNED }, + [I16_32] = { 16, 32, OPERAND_SIGNED }, [I16_16] = { 16, 16, OPERAND_SIGNED }, [U16_16] = { 16, 16, 0 }, [U16_32] = { 16, 32, 0 }, @@ -202,6 +233,9 @@ static const struct s390_operand operands[] = [U32_16] = { 32, 16, 0 }, [M_16] = { 4, 16, 0 }, [M_20] = { 4, 20, 0 }, + [M_24] = { 4, 24, 0 }, + [M_28] = { 4, 28, 0 }, + [M_32] = { 4, 32, 0 }, [RO_28] = { 4, 28, OPERAND_GPR } }; @@ -283,6 +317,7 @@ static const unsigned char formats[][7] = { [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, + [INSTR_RXE_RRRDM] = { 0xff, R_8,D_20,X_12,B_16,M_32,0 }, [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 }, [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 }, [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 }, @@ -307,6 +342,37 @@ static const unsigned char formats[][7] = { [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 }, [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, + [INSTR_VRI_V0IM] = { 0xff, V_8,I16_16,M_32,0,0,0 }, + [INSTR_VRI_V0I0] = { 0xff, V_8,I16_16,0,0,0,0 }, + [INSTR_VRI_V0IIM] = { 0xff, V_8,I8_16,I8_24,M_32,0,0 }, + [INSTR_VRI_VVIM] = { 0xff, V_8,I16_16,V_12,M_32,0,0 }, + [INSTR_VRI_VVV0IM]= { 0xff, V_8,V_12,V_16,I8_24,M_32,0 }, + [INSTR_VRI_VVV0I0]= { 0xff, V_8,V_12,V_16,I8_24,0,0 }, + [INSTR_VRI_VVIMM] = { 0xff, V_8,V_12,I16_16,M_32,M_28,0 }, + [INSTR_VRR_VV00MMM]={ 0xff, V_8,V_12,M_32,M_28,M_24,0 }, + [INSTR_VRR_VV000MM]={ 0xff, V_8,V_12,M_32,M_28,0,0 }, + [INSTR_VRR_VV0000M]={ 0xff, V_8,V_12,M_32,0,0,0 }, + [INSTR_VRR_VV00000]={ 0xff, V_8,V_12,0,0,0,0 }, + [INSTR_VRR_VVV0M0M]={ 0xff, V_8,V_12,V_16,M_32,M_24,0 }, + [INSTR_VRR_VV00M0M]={ 0xff, V_8,V_12,M_32,M_24,0,0 }, + [INSTR_VRR_VVV000M]={ 0xff, V_8,V_12,V_16,M_32,0,0 }, + [INSTR_VRR_VVV000V]={ 0xff, V_8,V_12,V_16,V_32,0,0 }, + [INSTR_VRR_VVV0000]={ 0xff, V_8,V_12,V_16,0,0,0 }, + [INSTR_VRR_VVV0MMM]={ 0xff, V_8,V_12,V_16,M_32,M_28,M_24 }, + [INSTR_VRR_VVV00MM]={ 0xff, V_8,V_12,V_16,M_32,M_28,0 }, + [INSTR_VRR_VVVMM0V]={ 0xff, V_8,V_12,V_16,V_32,M_20,M_24 }, + [INSTR_VRR_VVVM0MV]={ 0xff, V_8,V_12,V_16,V_32,M_28,M_20 }, + [INSTR_VRR_VVVM00V]={ 0xff, V_8,V_12,V_16,V_32,M_20,0 }, + [INSTR_VRR_VRR0000]={ 0xff, V_8,R_12,R_16,0,0,0 }, + [INSTR_VRS_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 }, + [INSTR_VRS_VVRD0] = { 0xff, V_8,V_12,D_20,B_16,0,0 }, + [INSTR_VRS_VRRDM] = { 0xff, V_8,R_12,D_20,B_16,M_32,0 }, + [INSTR_VRS_VRRD0] = { 0xff, V_8,R_12,D_20,B_16,0,0 }, + [INSTR_VRS_RVRDM] = { 0xff, R_8,V_12,D_20,B_16,M_32,0 }, + [INSTR_VRV_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 }, + [INSTR_VRV_VWRDM] = { 0xff, V_8,D_20,W_12,B_16,M_32,0 }, + [INSTR_VRX_VRRDM] = { 0xff, V_8,D_20,X_12,B_16,M_32,0 }, + [INSTR_VRX_VRRD0] = { 0xff, V_8,D_20,X_12,B_16,0,0 }, }; enum { @@ -381,6 +447,11 @@ enum { LONG_INSN_MPCIFC, LONG_INSN_STPCIFC, LONG_INSN_PCISTB, + LONG_INSN_VPOPCT, + LONG_INSN_VERLLV, + LONG_INSN_VESRAV, + LONG_INSN_VESRLV, + LONG_INSN_VSBCBI }; static char *long_insn_name[] = { @@ -455,6 +526,11 @@ static char *long_insn_name[] = { [LONG_INSN_MPCIFC] = "mpcifc", [LONG_INSN_STPCIFC] = "stpcifc", [LONG_INSN_PCISTB] = "pcistb", + [LONG_INSN_VPOPCT] = "vpopct", + [LONG_INSN_VERLLV] = "verllv", + [LONG_INSN_VESRAV] = "vesrav", + [LONG_INSN_VESRLV] = "vesrlv", + [LONG_INSN_VSBCBI] = "vsbcbi", }; static struct s390_insn opcode[] = { @@ -1369,6 +1445,150 @@ static struct s390_insn opcode_e5[] = { { "", 0, INSTR_INVALID } }; +static struct s390_insn opcode_e7[] = { +#ifdef CONFIG_64BIT + { "lcbb", 0x27, INSTR_RXE_RRRDM }, + { "vgef", 0x13, INSTR_VRV_VVRDM }, + { "vgeg", 0x12, INSTR_VRV_VVRDM }, + { "vgbm", 0x44, INSTR_VRI_V0I0 }, + { "vgm", 0x46, INSTR_VRI_V0IIM }, + { "vl", 0x06, INSTR_VRX_VRRD0 }, + { "vlr", 0x56, INSTR_VRR_VV00000 }, + { "vlrp", 0x05, INSTR_VRX_VRRDM }, + { "vleb", 0x00, INSTR_VRX_VRRDM }, + { "vleh", 0x01, INSTR_VRX_VRRDM }, + { "vlef", 0x03, INSTR_VRX_VRRDM }, + { "vleg", 0x02, INSTR_VRX_VRRDM }, + { "vleib", 0x40, INSTR_VRI_V0IM }, + { "vleih", 0x41, INSTR_VRI_V0IM }, + { "vleif", 0x43, INSTR_VRI_V0IM }, + { "vleig", 0x42, INSTR_VRI_V0IM }, + { "vlgv", 0x21, INSTR_VRS_RVRDM }, + { "vllez", 0x04, INSTR_VRX_VRRDM }, + { "vlm", 0x36, INSTR_VRS_VVRD0 }, + { "vlbb", 0x07, INSTR_VRX_VRRDM }, + { "vlvg", 0x22, INSTR_VRS_VRRDM }, + { "vlvgp", 0x62, INSTR_VRR_VRR0000 }, + { "vll", 0x37, INSTR_VRS_VRRD0 }, + { "vmrh", 0x61, INSTR_VRR_VVV000M }, + { "vmrl", 0x60, INSTR_VRR_VVV000M }, + { "vpk", 0x94, INSTR_VRR_VVV000M }, + { "vpks", 0x97, INSTR_VRR_VVV0M0M }, + { "vpkls", 0x95, INSTR_VRR_VVV0M0M }, + { "vperm", 0x8c, INSTR_VRR_VVV000V }, + { "vpdi", 0x84, INSTR_VRR_VVV000M }, + { "vrep", 0x4d, INSTR_VRI_VVIM }, + { "vrepi", 0x45, INSTR_VRI_V0IM }, + { "vscef", 0x1b, INSTR_VRV_VWRDM }, + { "vsceg", 0x1a, INSTR_VRV_VWRDM }, + { "vsel", 0x8d, INSTR_VRR_VVV000V }, + { "vseg", 0x5f, INSTR_VRR_VV0000M }, + { "vst", 0x0e, INSTR_VRX_VRRD0 }, + { "vsteb", 0x08, INSTR_VRX_VRRDM }, + { "vsteh", 0x09, INSTR_VRX_VRRDM }, + { "vstef", 0x0b, INSTR_VRX_VRRDM }, + { "vsteg", 0x0a, INSTR_VRX_VRRDM }, + { "vstm", 0x3e, INSTR_VRS_VVRD0 }, + { "vstl", 0x3f, INSTR_VRS_VRRD0 }, + { "vuph", 0xd7, INSTR_VRR_VV0000M }, + { "vuplh", 0xd5, INSTR_VRR_VV0000M }, + { "vupl", 0xd6, INSTR_VRR_VV0000M }, + { "vupll", 0xd4, INSTR_VRR_VV0000M }, + { "va", 0xf3, INSTR_VRR_VVV000M }, + { "vacc", 0xf1, INSTR_VRR_VVV000M }, + { "vac", 0xbb, INSTR_VRR_VVVM00V }, + { "vaccc", 0xb9, INSTR_VRR_VVVM00V }, + { "vn", 0x68, INSTR_VRR_VVV0000 }, + { "vnc", 0x69, INSTR_VRR_VVV0000 }, + { "vavg", 0xf2, INSTR_VRR_VVV000M }, + { "vavgl", 0xf0, INSTR_VRR_VVV000M }, + { "vcksm", 0x66, INSTR_VRR_VVV0000 }, + { "vec", 0xdb, INSTR_VRR_VV0000M }, + { "vecl", 0xd9, INSTR_VRR_VV0000M }, + { "vceq", 0xf8, INSTR_VRR_VVV0M0M }, + { "vch", 0xfb, INSTR_VRR_VVV0M0M }, + { "vchl", 0xf9, INSTR_VRR_VVV0M0M }, + { "vclz", 0x53, INSTR_VRR_VV0000M }, + { "vctz", 0x52, INSTR_VRR_VV0000M }, + { "vx", 0x6d, INSTR_VRR_VVV0000 }, + { "vgfm", 0xb4, INSTR_VRR_VVV000M }, + { "vgfma", 0xbc, INSTR_VRR_VVVM00V }, + { "vlc", 0xde, INSTR_VRR_VV0000M }, + { "vlp", 0xdf, INSTR_VRR_VV0000M }, + { "vmx", 0xff, INSTR_VRR_VVV000M }, + { "vmxl", 0xfd, INSTR_VRR_VVV000M }, + { "vmn", 0xfe, INSTR_VRR_VVV000M }, + { "vmnl", 0xfc, INSTR_VRR_VVV000M }, + { "vmal", 0xaa, INSTR_VRR_VVVM00V }, + { "vmae", 0xae, INSTR_VRR_VVVM00V }, + { "vmale", 0xac, INSTR_VRR_VVVM00V }, + { "vmah", 0xab, INSTR_VRR_VVVM00V }, + { "vmalh", 0xa9, INSTR_VRR_VVVM00V }, + { "vmao", 0xaf, INSTR_VRR_VVVM00V }, + { "vmalo", 0xad, INSTR_VRR_VVVM00V }, + { "vmh", 0xa3, INSTR_VRR_VVV000M }, + { "vmlh", 0xa1, INSTR_VRR_VVV000M }, + { "vml", 0xa2, INSTR_VRR_VVV000M }, + { "vme", 0xa6, INSTR_VRR_VVV000M }, + { "vmle", 0xa4, INSTR_VRR_VVV000M }, + { "vmo", 0xa7, INSTR_VRR_VVV000M }, + { "vmlo", 0xa5, INSTR_VRR_VVV000M }, + { "vno", 0x6b, INSTR_VRR_VVV0000 }, + { "vo", 0x6a, INSTR_VRR_VVV0000 }, + { { 0, LONG_INSN_VPOPCT }, 0x50, INSTR_VRR_VV0000M }, + { { 0, LONG_INSN_VERLLV }, 0x73, INSTR_VRR_VVV000M }, + { "verll", 0x33, INSTR_VRS_VVRDM }, + { "verim", 0x72, INSTR_VRI_VVV0IM }, + { "veslv", 0x70, INSTR_VRR_VVV000M }, + { "vesl", 0x30, INSTR_VRS_VVRDM }, + { { 0, LONG_INSN_VESRAV }, 0x7a, INSTR_VRR_VVV000M }, + { "vesra", 0x3a, INSTR_VRS_VVRDM }, + { { 0, LONG_INSN_VESRLV }, 0x78, INSTR_VRR_VVV000M }, + { "vesrl", 0x38, INSTR_VRS_VVRDM }, + { "vsl", 0x74, INSTR_VRR_VVV0000 }, + { "vslb", 0x75, INSTR_VRR_VVV0000 }, + { "vsldb", 0x77, INSTR_VRI_VVV0I0 }, + { "vsra", 0x7e, INSTR_VRR_VVV0000 }, + { "vsrab", 0x7f, INSTR_VRR_VVV0000 }, + { "vsrl", 0x7c, INSTR_VRR_VVV0000 }, + { "vsrlb", 0x7d, INSTR_VRR_VVV0000 }, + { "vs", 0xf7, INSTR_VRR_VVV000M }, + { "vscb", 0xf5, INSTR_VRR_VVV000M }, + { "vsb", 0xbf, INSTR_VRR_VVVM00V }, + { { 0, LONG_INSN_VSBCBI }, 0xbd, INSTR_VRR_VVVM00V }, + { "vsumg", 0x65, INSTR_VRR_VVV000M }, + { "vsumq", 0x67, INSTR_VRR_VVV000M }, + { "vsum", 0x64, INSTR_VRR_VVV000M }, + { "vtm", 0xd8, INSTR_VRR_VV00000 }, + { "vfae", 0x82, INSTR_VRR_VVV0M0M }, + { "vfee", 0x80, INSTR_VRR_VVV0M0M }, + { "vfene", 0x81, INSTR_VRR_VVV0M0M }, + { "vistr", 0x5c, INSTR_VRR_VV00M0M }, + { "vstrc", 0x8a, INSTR_VRR_VVVMM0V }, + { "vfa", 0xe3, INSTR_VRR_VVV00MM }, + { "wfc", 0xcb, INSTR_VRR_VV000MM }, + { "wfk", 0xca, INSTR_VRR_VV000MM }, + { "vfce", 0xe8, INSTR_VRR_VVV0MMM }, + { "vfch", 0xeb, INSTR_VRR_VVV0MMM }, + { "vfche", 0xea, INSTR_VRR_VVV0MMM }, + { "vcdg", 0xc3, INSTR_VRR_VV00MMM }, + { "vcdlg", 0xc1, INSTR_VRR_VV00MMM }, + { "vcgd", 0xc2, INSTR_VRR_VV00MMM }, + { "vclgd", 0xc0, INSTR_VRR_VV00MMM }, + { "vfd", 0xe5, INSTR_VRR_VVV00MM }, + { "vfi", 0xc7, INSTR_VRR_VV00MMM }, + { "vlde", 0xc4, INSTR_VRR_VV000MM }, + { "vled", 0xc5, INSTR_VRR_VV00MMM }, + { "vfm", 0xe7, INSTR_VRR_VVV00MM }, + { "vfma", 0x8f, INSTR_VRR_VVVM0MV }, + { "vfms", 0x8e, INSTR_VRR_VVVM0MV }, + { "vfpso", 0xcc, INSTR_VRR_VV00MMM }, + { "vfsq", 0xce, INSTR_VRR_VV000MM }, + { "vfs", 0xe2, INSTR_VRR_VVV00MM }, + { "vftci", 0x4a, INSTR_VRI_VVIMM }, +#endif +}; + static struct s390_insn opcode_eb[] = { #ifdef CONFIG_64BIT { "lmg", 0x04, INSTR_RSY_RRRD }, @@ -1552,16 +1772,17 @@ static struct s390_insn opcode_ed[] = { static unsigned int extract_operand(unsigned char *code, const struct s390_operand *operand) { + unsigned char *cp; unsigned int val; int bits; /* Extract fragments of the operand byte for byte. */ - code += operand->shift / 8; + cp = code + operand->shift / 8; bits = (operand->shift & 7) + operand->bits; val = 0; do { val <<= 8; - val |= (unsigned int) *code++; + val |= (unsigned int) *cp++; bits -= 8; } while (bits > 0); val >>= -bits; @@ -1571,6 +1792,18 @@ static unsigned int extract_operand(unsigned char *code, if (operand->bits == 20 && operand->shift == 20) val = (val & 0xff) << 12 | (val & 0xfff00) >> 8; + /* Check for register extensions bits for vector registers. */ + if (operand->flags & OPERAND_VR) { + if (operand->shift == 8) + val |= (code[4] & 8) << 1; + else if (operand->shift == 12) + val |= (code[4] & 4) << 2; + else if (operand->shift == 16) + val |= (code[4] & 2) << 3; + else if (operand->shift == 32) + val |= (code[4] & 1) << 4; + } + /* Sign extend value if the operand is signed or pc relative. */ if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) && (val & (1U << (operand->bits - 1)))) @@ -1639,6 +1872,10 @@ struct s390_insn *find_insn(unsigned char *code) case 0xe5: table = opcode_e5; break; + case 0xe7: + table = opcode_e7; + opfrag = code[5]; + break; case 0xeb: table = opcode_eb; opfrag = code[5]; @@ -1734,6 +1971,8 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr += sprintf(ptr, "%%a%i", value); else if (operand->flags & OPERAND_CR) ptr += sprintf(ptr, "%%c%i", value); + else if (operand->flags & OPERAND_VR) + ptr += sprintf(ptr, "%%v%i", value); else if (operand->flags & OPERAND_PCREL) ptr += sprintf(ptr, "%lx", (signed int) value + addr); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 0dff972a169c..cef2879edff3 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -390,10 +390,10 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; if (test_facility(50) && test_facility(73)) S390_lowcore.machine_flags |= MACHINE_FLAG_TE; - if (test_facility(66)) - S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; if (test_facility(51)) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; + if (test_facility(129)) + S390_lowcore.machine_flags |= MACHINE_FLAG_VX; #endif } diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 1aad48398d06..0554b9771c9f 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -4,7 +4,7 @@ #include <linux/types.h> #include <linux/signal.h> #include <asm/ptrace.h> -#include <asm/cputime.h> +#include <asm/idle.h> extern void *restart_stack; extern unsigned long suspend_zero_pages; @@ -21,6 +21,8 @@ void psw_idle(struct s390_idle_data *, unsigned long); asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); +int alloc_vector_registers(struct task_struct *tsk); + void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); @@ -43,8 +45,10 @@ void special_op_exception(struct pt_regs *regs); void specification_exception(struct pt_regs *regs); void transaction_exception(struct pt_regs *regs); void translation_exception(struct pt_regs *regs); +void vector_exception(struct pt_regs *regs); void do_per_trap(struct pt_regs *regs); +void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index f2e674c702e1..7b2e03afd017 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -42,7 +42,8 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ + _TIF_UPROBE) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) @@ -265,6 +266,10 @@ sysc_work: jo sysc_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo sysc_reschedule +#ifdef CONFIG_UPROBES + tm __TI_flags+7(%r12),_TIF_UPROBE + jo sysc_uprobe_notify +#endif tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP jo sysc_singlestep tm __TI_flags+7(%r12),_TIF_SIGPENDING @@ -323,6 +328,16 @@ sysc_notify_resume: jg do_notify_resume # +# _TIF_UPROBE is set, call uprobe_notify_resume +# +#ifdef CONFIG_UPROBES +sysc_uprobe_notify: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg uprobe_notify_resume +#endif + +# # _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 54d6493c4a56..51d14fe5eb9a 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -1,7 +1,7 @@ /* * Dynamic function tracer architecture backend. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009,2014 * * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -17,100 +17,76 @@ #include <asm/asm-offsets.h> #include "entry.h" -#ifdef CONFIG_DYNAMIC_FTRACE - +void mcount_replace_code(void); void ftrace_disable_code(void); void ftrace_enable_insn(void); -#ifdef CONFIG_64BIT /* - * The 64-bit mcount code looks like this: + * The mcount code looks like this: * stg %r14,8(%r15) # offset 0 - * > larl %r1,<&counter> # offset 6 - * > brasl %r14,_mcount # offset 12 + * larl %r1,<&counter> # offset 6 + * brasl %r14,_mcount # offset 12 * lg %r14,8(%r15) # offset 18 - * Total length is 24 bytes. The middle two instructions of the mcount - * block get overwritten by ftrace_make_nop / ftrace_make_call. - * The 64-bit enabled ftrace code block looks like this: - * stg %r14,8(%r15) # offset 0 + * Total length is 24 bytes. The complete mcount block initially gets replaced + * by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop + * only patch the jg/lg instruction within the block. + * Note: we do not patch the first instruction to an unconditional branch, + * since that would break kprobes/jprobes. It is easier to leave the larl + * instruction in and only modify the second instruction. + * The enabled ftrace code block looks like this: + * larl %r0,.+24 # offset 0 * > lg %r1,__LC_FTRACE_FUNC # offset 6 - * > lgr %r0,%r0 # offset 12 - * > basr %r14,%r1 # offset 16 - * lg %r14,8(%15) # offset 18 - * The return points of the mcount/ftrace function have the same offset 18. - * The 64-bit disable ftrace code block looks like this: - * stg %r14,8(%r15) # offset 0 + * br %r1 # offset 12 + * brcl 0,0 # offset 14 + * brc 0,0 # offset 20 + * The ftrace function gets called with a non-standard C function call ABI + * where r0 contains the return address. It is also expected that the called + * function only clobbers r0 and r1, but restores r2-r15. + * The return point of the ftrace function has offset 24, so execution + * continues behind the mcount block. + * larl %r0,.+24 # offset 0 * > jg .+18 # offset 6 - * > lgr %r0,%r0 # offset 12 - * > basr %r14,%r1 # offset 16 - * lg %r14,8(%15) # offset 18 + * br %r1 # offset 12 + * brcl 0,0 # offset 14 + * brc 0,0 # offset 20 * The jg instruction branches to offset 24 to skip as many instructions * as possible. */ asm( " .align 4\n" + "mcount_replace_code:\n" + " larl %r0,0f\n" "ftrace_disable_code:\n" " jg 0f\n" - " lgr %r0,%r0\n" - " basr %r14,%r1\n" + " br %r1\n" + " brcl 0,0\n" + " brc 0,0\n" "0:\n" " .align 4\n" "ftrace_enable_insn:\n" " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); +#define MCOUNT_BLOCK_SIZE 24 +#define MCOUNT_INSN_OFFSET 6 #define FTRACE_INSN_SIZE 6 -#else /* CONFIG_64BIT */ -/* - * The 31-bit mcount code looks like this: - * st %r14,4(%r15) # offset 0 - * > bras %r1,0f # offset 4 - * > .long _mcount # offset 8 - * > .long <&counter> # offset 12 - * > 0: l %r14,0(%r1) # offset 16 - * > l %r1,4(%r1) # offset 20 - * basr %r14,%r14 # offset 24 - * l %r14,4(%r15) # offset 26 - * Total length is 30 bytes. The twenty bytes starting from offset 4 - * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call. - * The 31-bit enabled ftrace code block looks like this: - * st %r14,4(%r15) # offset 0 - * > l %r14,__LC_FTRACE_FUNC # offset 4 - * > j 0f # offset 8 - * > .fill 12,1,0x07 # offset 12 - * 0: basr %r14,%r14 # offset 24 - * l %r14,4(%r14) # offset 26 - * The return points of the mcount/ftrace function have the same offset 26. - * The 31-bit disabled ftrace code block looks like this: - * st %r14,4(%r15) # offset 0 - * > j .+26 # offset 4 - * > j 0f # offset 8 - * > .fill 12,1,0x07 # offset 12 - * 0: basr %r14,%r14 # offset 24 - * l %r14,4(%r14) # offset 26 - * The j instruction branches to offset 30 to skip as many instructions - * as possible. - */ -asm( - " .align 4\n" - "ftrace_disable_code:\n" - " j 1f\n" - " j 0f\n" - " .fill 12,1,0x07\n" - "0: basr %r14,%r14\n" - "1:\n" - " .align 4\n" - "ftrace_enable_insn:\n" - " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n"); - -#define FTRACE_INSN_SIZE 4 - -#endif /* CONFIG_64BIT */ - +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return 0; +} int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { + /* Initial replacement of the whole mcount block */ + if (addr == MCOUNT_ADDR) { + if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET, + mcount_replace_code, + MCOUNT_BLOCK_SIZE)) + return -EPERM; + return 0; + } if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, MCOUNT_INSN_SIZE)) return -EPERM; @@ -135,8 +111,6 @@ int __init ftrace_dyn_arch_init(void) return 0; } -#endif /* CONFIG_DYNAMIC_FTRACE */ - #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * Hook the return address and push it in the stack of return addresses @@ -162,31 +136,26 @@ out: return parent; } -#ifdef CONFIG_DYNAMIC_FTRACE /* * Patch the kernel code at ftrace_graph_caller location. The instruction - * there is branch relative and save to prepare_ftrace_return. To disable - * the call to prepare_ftrace_return we patch the bras offset to point - * directly after the instructions. To enable the call we calculate - * the original offset to prepare_ftrace_return and put it back. + * there is branch relative on condition. To enable the ftrace graph code + * block, we simply patch the mask field of the instruction to zero and + * turn the instruction into a nop. + * To disable the ftrace graph code the mask field will be patched to + * all ones, which turns the instruction into an unconditional branch. */ int ftrace_enable_ftrace_graph_caller(void) { - unsigned short offset; + u8 op = 0x04; /* set mask field to zero */ - offset = ((void *) prepare_ftrace_return - - (void *) ftrace_graph_caller) / 2; - return probe_kernel_write((void *) ftrace_graph_caller + 2, - &offset, sizeof(offset)); + return probe_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op)); } int ftrace_disable_ftrace_graph_caller(void) { - static unsigned short offset = 0x0002; + u8 op = 0xf4; /* set mask field to all ones */ - return probe_kernel_write((void *) ftrace_graph_caller + 2, - &offset, sizeof(offset)); + return probe_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op)); } -#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index e88d35d74950..d62eee11f0b5 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -398,7 +398,7 @@ ENTRY(startup_kdump) xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST #ifndef CONFIG_MARCH_G5 # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} - .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list + .insn s,0xb2b10000,0 # store facilities @ __LC_STFL_FAC_LIST tm __LC_STFL_FAC_LIST,0x01 # stfle available ? jz 0f la %r0,1 diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c new file mode 100644 index 000000000000..c846aee7372f --- /dev/null +++ b/arch/s390/kernel/idle.c @@ -0,0 +1,124 @@ +/* + * Idle functions for s390. + * + * Copyright IBM Corp. 2014 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/kprobes.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <asm/cputime.h> +#include <asm/nmi.h> +#include <asm/smp.h> +#include "entry.h" + +static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); + +void __kprobes enabled_wait(void) +{ + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + unsigned long long idle_time; + unsigned long psw_mask; + + trace_hardirqs_on(); + + /* Wait for external, I/O or machine check interrupt. */ + psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + clear_cpu_flag(CIF_NOHZ_DELAY); + + /* Call the assembler magic in entry.S */ + psw_idle(idle, psw_mask); + + /* Account time spent with enabled wait psw loaded as idle time. */ + idle->sequence++; + smp_wmb(); + idle_time = idle->clock_idle_exit - idle->clock_idle_enter; + idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; + idle->idle_time += idle_time; + idle->idle_count++; + account_idle_time(idle_time); + smp_wmb(); + idle->sequence++; +} + +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long idle_count; + unsigned int sequence; + + do { + sequence = ACCESS_ONCE(idle->sequence); + idle_count = ACCESS_ONCE(idle->idle_count); + if (ACCESS_ONCE(idle->clock_idle_enter)) + idle_count++; + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + return sprintf(buf, "%llu\n", idle_count); +} +DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); + +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long now, idle_time, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_time = ACCESS_ONCE(idle->idle_time); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + return sprintf(buf, "%llu\n", idle_time >> 12); +} +DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); + +cputime64_t arch_cpu_idle_time(int cpu) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); + unsigned long long now, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; +} + +void arch_cpu_idle_enter(void) +{ + local_mcck_disable(); +} + +void arch_cpu_idle(void) +{ + if (!test_cpu_flag(CIF_MCCK_PENDING)) + /* Halt the cpu and keep track of cpu time accounting. */ + enabled_wait(); + local_irq_enable(); +} + +void arch_cpu_idle_exit(void) +{ + local_mcck_enable(); + if (test_cpu_flag(CIF_MCCK_PENDING)) + s390_handle_mcck(); +} + +void arch_cpu_idle_dead(void) +{ + cpu_die(); +} diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 8eb82443cfbd..1b8a38ab7861 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -70,6 +70,7 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, + {.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"}, {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"}, @@ -258,7 +259,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy) ext_code = *(struct ext_code *) ®s->int_code; if (ext_code.code != EXT_IRQ_CLK_COMP) - __get_cpu_var(s390_idle).nohz_delay = 1; + set_cpu_flag(CIF_NOHZ_DELAY); index = ext_hash(ext_code.code); rcu_read_lock(); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index bc71a7b95af5..27ae5433fe4d 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -58,161 +58,13 @@ struct kprobe_insn_cache kprobe_dmainsn_slots = { .insn_size = MAX_INSN_SIZE, }; -static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) -{ - if (!is_known_insn((unsigned char *)insn)) - return -EINVAL; - switch (insn[0] >> 8) { - case 0x0c: /* bassm */ - case 0x0b: /* bsm */ - case 0x83: /* diag */ - case 0x44: /* ex */ - case 0xac: /* stnsm */ - case 0xad: /* stosm */ - return -EINVAL; - case 0xc6: - switch (insn[0] & 0x0f) { - case 0x00: /* exrl */ - return -EINVAL; - } - } - switch (insn[0]) { - case 0x0101: /* pr */ - case 0xb25a: /* bsa */ - case 0xb240: /* bakr */ - case 0xb258: /* bsg */ - case 0xb218: /* pc */ - case 0xb228: /* pt */ - case 0xb98d: /* epsw */ - return -EINVAL; - } - return 0; -} - -static int __kprobes get_fixup_type(kprobe_opcode_t *insn) -{ - /* default fixup method */ - int fixup = FIXUP_PSW_NORMAL; - - switch (insn[0] >> 8) { - case 0x05: /* balr */ - case 0x0d: /* basr */ - fixup = FIXUP_RETURN_REGISTER; - /* if r2 = 0, no branch will be taken */ - if ((insn[0] & 0x0f) == 0) - fixup |= FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x06: /* bctr */ - case 0x07: /* bcr */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x45: /* bal */ - case 0x4d: /* bas */ - fixup = FIXUP_RETURN_REGISTER; - break; - case 0x47: /* bc */ - case 0x46: /* bct */ - case 0x86: /* bxh */ - case 0x87: /* bxle */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x82: /* lpsw */ - fixup = FIXUP_NOT_REQUIRED; - break; - case 0xb2: /* lpswe */ - if ((insn[0] & 0xff) == 0xb2) - fixup = FIXUP_NOT_REQUIRED; - break; - case 0xa7: /* bras */ - if ((insn[0] & 0x0f) == 0x05) - fixup |= FIXUP_RETURN_REGISTER; - break; - case 0xc0: - if ((insn[0] & 0x0f) == 0x05) /* brasl */ - fixup |= FIXUP_RETURN_REGISTER; - break; - case 0xeb: - switch (insn[2] & 0xff) { - case 0x44: /* bxhg */ - case 0x45: /* bxleg */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - } - break; - case 0xe3: /* bctg */ - if ((insn[2] & 0xff) == 0x46) - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0xec: - switch (insn[2] & 0xff) { - case 0xe5: /* clgrb */ - case 0xe6: /* cgrb */ - case 0xf6: /* crb */ - case 0xf7: /* clrb */ - case 0xfc: /* cgib */ - case 0xfd: /* cglib */ - case 0xfe: /* cib */ - case 0xff: /* clib */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - } - break; - } - return fixup; -} - -static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn) -{ - /* Check if we have a RIL-b or RIL-c format instruction which - * we need to modify in order to avoid instruction emulation. */ - switch (insn[0] >> 8) { - case 0xc0: - if ((insn[0] & 0x0f) == 0x00) /* larl */ - return true; - break; - case 0xc4: - switch (insn[0] & 0x0f) { - case 0x02: /* llhrl */ - case 0x04: /* lghrl */ - case 0x05: /* lhrl */ - case 0x06: /* llghrl */ - case 0x07: /* sthrl */ - case 0x08: /* lgrl */ - case 0x0b: /* stgrl */ - case 0x0c: /* lgfrl */ - case 0x0d: /* lrl */ - case 0x0e: /* llgfrl */ - case 0x0f: /* strl */ - return true; - } - break; - case 0xc6: - switch (insn[0] & 0x0f) { - case 0x02: /* pfdrl */ - case 0x04: /* cghrl */ - case 0x05: /* chrl */ - case 0x06: /* clghrl */ - case 0x07: /* clhrl */ - case 0x08: /* cgrl */ - case 0x0a: /* clgrl */ - case 0x0c: /* cgfrl */ - case 0x0d: /* crl */ - case 0x0e: /* clgfrl */ - case 0x0f: /* clrl */ - return true; - } - break; - } - return false; -} - static void __kprobes copy_instruction(struct kprobe *p) { s64 disp, new_disp; u64 addr, new_addr; memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8)); - if (!is_insn_relative_long(p->ainsn.insn)) + if (!probe_is_insn_relative_long(p->ainsn.insn)) return; /* * For pc-relative instructions in RIL-b or RIL-c format patch the @@ -276,7 +128,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if ((unsigned long) p->addr & 0x01) return -EINVAL; /* Make sure the probe isn't going on a difficult instruction */ - if (is_prohibited_opcode(p->addr)) + if (probe_is_prohibited_opcode(p->addr)) return -EINVAL; if (s390_get_insn_slot(p)) return -ENOMEM; @@ -605,7 +457,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; - int fixup = get_fixup_type(p->ainsn.insn); + int fixup = probe_get_fixup_type(p->ainsn.insn); if (fixup & FIXUP_PSW_NORMAL) ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; @@ -789,11 +641,6 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -static void __used __kprobes jprobe_return_end(void) -{ - asm volatile("bcr 0,0"); -} - int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 719e27b2cf22..4685337fa7c6 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -25,6 +25,7 @@ #include <asm/elf.h> #include <asm/asm-offsets.h> #include <asm/os_info.h> +#include <asm/switch_to.h> typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); @@ -43,7 +44,7 @@ static void add_elf_notes(int cpu) memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); - ptr = fill_cpu_elf_notes(ptr, sa); + ptr = fill_cpu_elf_notes(ptr, sa, NULL); memset(ptr, 0, sizeof(struct elf_note)); } @@ -53,8 +54,11 @@ static void add_elf_notes(int cpu) static void setup_regs(void) { unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + struct _lowcore *lc; int cpu, this_cpu; + /* Get lowcore pointer from store status of this CPU (absolute zero) */ + lc = (struct _lowcore *)(unsigned long)S390_lowcore.prefixreg_save_area; this_cpu = smp_find_processor_id(stap()); add_elf_notes(this_cpu); for_each_online_cpu(cpu) { @@ -64,6 +68,8 @@ static void setup_regs(void) continue; add_elf_notes(cpu); } + if (MACHINE_HAS_VX) + save_vx_regs_safe((void *) lc->vector_save_area_addr); /* Copy dump CPU store status info to absolute zero */ memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 433c6dbfa442..4300ea374826 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -8,62 +8,72 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/ftrace.h> +#include <asm/ptrace.h> .section .kprobes.text, "ax" ENTRY(ftrace_stub) br %r14 +#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) +#define STACK_PTREGS (STACK_FRAME_OVERHEAD) +#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) +#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) + ENTRY(_mcount) -#ifdef CONFIG_DYNAMIC_FTRACE br %r14 ENTRY(ftrace_caller) + .globl ftrace_regs_caller + .set ftrace_regs_caller,ftrace_caller + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) + stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + stg %r0,(STACK_PTREGS_PSW+8)(%r15) + stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + aghik %r2,%r0,-MCOUNT_INSN_SIZE + lgrl %r4,function_trace_op + lgrl %r1,ftrace_trace_function +#else + lgr %r2,%r0 + aghi %r2,-MCOUNT_INSN_SIZE + larl %r4,function_trace_op + lg %r4,0(%r4) + larl %r1,ftrace_trace_function + lg %r1,0(%r1) #endif - stm %r2,%r5,16(%r15) - bras %r1,1f -0: .long ftrace_trace_function -1: st %r14,56(%r15) - lr %r0,%r15 - ahi %r15,-96 - l %r3,100(%r15) - la %r2,0(%r14) - st %r0,__SF_BACKCHAIN(%r15) - la %r3,0(%r3) - ahi %r2,-MCOUNT_INSN_SIZE - l %r14,0b-0b(%r1) - l %r14,0(%r14) - basr %r14,%r14 + lgr %r3,%r14 + la %r5,STACK_PTREGS(%r15) + basr %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER - l %r2,100(%r15) - l %r3,152(%r15) +# The j instruction gets runtime patched to a nop instruction. +# See ftrace_enable_ftrace_graph_caller. ENTRY(ftrace_graph_caller) -# The bras instruction gets runtime patched to call prepare_ftrace_return. -# See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# bras %r14,prepare_ftrace_return - bras %r14,0f -0: st %r2,100(%r15) + j ftrace_graph_caller_end + lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) + lg %r3,(STACK_PTREGS_PSW+8)(%r15) + brasl %r14,prepare_ftrace_return + stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) +ftrace_graph_caller_end: + .globl ftrace_graph_caller_end #endif - ahi %r15,96 - l %r14,56(%r15) - lm %r2,%r5,16(%r15) - br %r14 + lg %r1,(STACK_PTREGS_PSW+8)(%r15) + lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) + br %r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(return_to_handler) - stm %r2,%r5,16(%r15) - st %r14,56(%r15) - lr %r0,%r15 - ahi %r15,-96 - st %r0,__SF_BACKCHAIN(%r15) - bras %r1,0f - .long ftrace_return_to_handler -0: l %r2,0b-0b(%r1) - basr %r14,%r2 - lr %r14,%r2 - ahi %r15,96 - lm %r2,%r5,16(%r15) + stmg %r2,%r5,32(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + brasl %r14,ftrace_return_to_handler + aghi %r15,STACK_FRAME_OVERHEAD + lgr %r14,%r2 + lmg %r2,%r5,32(%r15) br %r14 #endif diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S deleted file mode 100644 index c67a8bf0fd9a..000000000000 --- a/arch/s390/kernel/mcount64.S +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright IBM Corp. 2008, 2009 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, - * - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/ftrace.h> - - .section .kprobes.text, "ax" - -ENTRY(ftrace_stub) - br %r14 - -ENTRY(_mcount) -#ifdef CONFIG_DYNAMIC_FTRACE - br %r14 - -ENTRY(ftrace_caller) -#endif - stmg %r2,%r5,32(%r15) - stg %r14,112(%r15) - lgr %r1,%r15 - aghi %r15,-160 - stg %r1,__SF_BACKCHAIN(%r15) - lgr %r2,%r14 - lg %r3,168(%r15) - aghi %r2,-MCOUNT_INSN_SIZE - larl %r14,ftrace_trace_function - lg %r14,0(%r14) - basr %r14,%r14 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - lg %r2,168(%r15) - lg %r3,272(%r15) -ENTRY(ftrace_graph_caller) -# The bras instruction gets runtime patched to call prepare_ftrace_return. -# See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# bras %r14,prepare_ftrace_return - bras %r14,0f -0: stg %r2,168(%r15) -#endif - aghi %r15,160 - lmg %r2,%r5,32(%r15) - lg %r14,112(%r15) - br %r14 - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -ENTRY(return_to_handler) - stmg %r2,%r5,32(%r15) - lgr %r1,%r15 - aghi %r15,-160 - stg %r1,__SF_BACKCHAIN(%r15) - brasl %r14,ftrace_return_to_handler - aghi %r15,160 - lgr %r14,%r2 - lmg %r2,%r5,32(%r15) - br %r14 - -#endif diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 210e1285f75a..db96b418160a 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -20,6 +20,7 @@ #include <asm/cputime.h> #include <asm/nmi.h> #include <asm/crw.h> +#include <asm/switch_to.h> struct mcck_struct { int kill_task; @@ -163,6 +164,21 @@ static int notrace s390_revalidate_registers(struct mci *mci) " ld 15,120(%0)\n" : : "a" (fpt_save_area)); } + +#ifdef CONFIG_64BIT + /* Revalidate vector registers */ + if (MACHINE_HAS_VX && current->thread.vxrs) { + if (!mci->vr) { + /* + * Vector registers can't be restored and therefore + * the process needs to be terminated. + */ + kill_task = 1; + } + restore_vx_regs((__vector128 *) + S390_lowcore.vector_save_area_addr); + } +#endif /* Revalidate access registers */ asm volatile( " lam 0,15,0(%0)" diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S index 813ec7260878..f6f8886399f6 100644 --- a/arch/s390/kernel/pgm_check.S +++ b/arch/s390/kernel/pgm_check.S @@ -49,7 +49,7 @@ PGM_CHECK_DEFAULT /* 17 */ PGM_CHECK_64BIT(transaction_exception) /* 18 */ PGM_CHECK_DEFAULT /* 19 */ PGM_CHECK_DEFAULT /* 1a */ -PGM_CHECK_DEFAULT /* 1b */ +PGM_CHECK_64BIT(vector_exception) /* 1b */ PGM_CHECK(space_switch_exception) /* 1c */ PGM_CHECK(hfp_sqrt_exception) /* 1d */ PGM_CHECK_DEFAULT /* 1e */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 93b9ca42e5c0..ed84cc224899 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -61,30 +61,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -void arch_cpu_idle(void) -{ - local_mcck_disable(); - if (test_cpu_flag(CIF_MCCK_PENDING)) { - local_mcck_enable(); - local_irq_enable(); - return; - } - /* Halt the cpu and keep track of cpu time accounting. */ - vtime_stop_cpu(); - local_irq_enable(); -} - -void arch_cpu_idle_exit(void) -{ - if (test_cpu_flag(CIF_MCCK_PENDING)) - s390_handle_mcck(); -} - -void arch_cpu_idle_dead(void) -{ - cpu_die(); -} - extern void __kprobes kernel_thread_starter(void); /* diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 24612029f450..edefead3b43a 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -23,7 +23,6 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); */ void cpu_init(void) { - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct cpuid *id = &__get_cpu_var(cpu_id); get_cpu_id(id); @@ -31,7 +30,6 @@ void cpu_init(void) current->active_mm = &init_mm; BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); - memset(idle, 0, sizeof(*idle)); } /* @@ -41,7 +39,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te" + "edat", "etf3eh", "highgprs", "te", "vx" }; unsigned long n = (unsigned long) v - 1; int i; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 5dc7ad9e2fbf..f537e937a988 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -38,15 +38,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> -enum s390_regset { - REGSET_GENERAL, - REGSET_FP, - REGSET_LAST_BREAK, - REGSET_TDB, - REGSET_SYSTEM_CALL, - REGSET_GENERAL_EXTENDED, -}; - void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); @@ -55,27 +46,39 @@ void update_cr_regs(struct task_struct *task) #ifdef CONFIG_64BIT /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE) { + if (MACHINE_HAS_TE || MACHINE_HAS_VX) { unsigned long cr, cr_new; __ctl_store(cr, 0, 0); - /* Set or clear transaction execution TXC bit 8. */ - cr_new = cr | (1UL << 55); - if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new &= ~(1UL << 55); + cr_new = cr; + if (MACHINE_HAS_TE) { + /* Set or clear transaction execution TXC bit 8. */ + cr_new |= (1UL << 55); + if (task->thread.per_flags & PER_FLAG_NO_TE) + cr_new &= ~(1UL << 55); + } + if (MACHINE_HAS_VX) { + /* Enable/disable of vector extension */ + cr_new &= ~(1UL << 17); + if (task->thread.vxrs) + cr_new |= (1UL << 17); + } if (cr_new != cr) __ctl_load(cr_new, 0, 0); - /* Set or clear transaction execution TDC bits 62 and 63. */ - __ctl_store(cr, 2, 2); - cr_new = cr & ~3UL; - if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { - if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new |= 1UL; - else - cr_new |= 2UL; + if (MACHINE_HAS_TE) { + /* Set/clear transaction execution TDC bits 62/63. */ + __ctl_store(cr, 2, 2); + cr_new = cr & ~3UL; + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { + if (task->thread.per_flags & + PER_FLAG_TE_ABORT_RAND_TEND) + cr_new |= 1UL; + else + cr_new |= 2UL; + } + if (cr_new != cr) + __ctl_load(cr_new, 2, 2); } - if (cr_new != cr) - __ctl_load(cr_new, 2, 2); } #endif /* Copy user specified PER registers */ @@ -84,7 +87,8 @@ void update_cr_regs(struct task_struct *task) new.end = thread->per_user.end; /* merge TIF_SINGLE_STEP into user specified PER registers. */ - if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) || + test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) { if (test_tsk_thread_flag(task, TIF_BLOCK_STEP)) new.control |= PER_EVENT_BRANCH; else @@ -93,6 +97,8 @@ void update_cr_regs(struct task_struct *task) new.control |= PER_CONTROL_SUSPENSION; new.control |= PER_EVENT_TRANSACTION_END; #endif + if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) + new.control |= PER_EVENT_IFETCH; new.start = 0; new.end = PSW_ADDR_INSN; } @@ -803,7 +809,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) long ret = 0; /* Do the secure computing check first. */ - if (secure_computing(regs->gprs[2])) { + if (secure_computing()) { /* seccomp failures shouldn't expose any additional code. */ ret = -1; goto out; @@ -923,7 +929,15 @@ static int s390_fpregs_get(struct task_struct *target, save_fp_ctl(&target->thread.fp_regs.fpc); save_fp_regs(target->thread.fp_regs.fprs); } +#ifdef CONFIG_64BIT + else if (target->thread.vxrs) { + int i; + for (i = 0; i < __NUM_VXRS_LOW; i++) + target->thread.fp_regs.fprs[i] = + *(freg_t *)(target->thread.vxrs + i); + } +#endif return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fp_regs, 0, -1); } @@ -957,9 +971,20 @@ static int s390_fpregs_set(struct task_struct *target, target->thread.fp_regs.fprs, offsetof(s390_fp_regs, fprs), -1); - if (rc == 0 && target == current) { - restore_fp_ctl(&target->thread.fp_regs.fpc); - restore_fp_regs(target->thread.fp_regs.fprs); + if (rc == 0) { + if (target == current) { + restore_fp_ctl(&target->thread.fp_regs.fpc); + restore_fp_regs(target->thread.fp_regs.fprs); + } +#ifdef CONFIG_64BIT + else if (target->thread.vxrs) { + int i; + + for (i = 0; i < __NUM_VXRS_LOW; i++) + *(freg_t *)(target->thread.vxrs + i) = + target->thread.fp_regs.fprs[i]; + } +#endif } return rc; @@ -1015,6 +1040,95 @@ static int s390_tdb_set(struct task_struct *target, return 0; } +static int s390_vxrs_active(struct task_struct *target, + const struct user_regset *regset) +{ + return !!target->thread.vxrs; +} + +static int s390_vxrs_low_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + if (target->thread.vxrs) { + if (target == current) + save_vx_regs(target->thread.vxrs); + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1); + } else + memset(vxrs, 0, sizeof(vxrs)); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); +} + +static int s390_vxrs_low_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + __u64 vxrs[__NUM_VXRS_LOW]; + int i, rc; + + if (!target->thread.vxrs) { + rc = alloc_vector_registers(target); + if (rc) + return rc; + } else if (target == current) + save_vx_regs(target->thread.vxrs); + + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); + if (rc == 0) { + for (i = 0; i < __NUM_VXRS_LOW; i++) + *((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i]; + if (target == current) + restore_vx_regs(target->thread.vxrs); + } + + return rc; +} + +static int s390_vxrs_high_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + __vector128 vxrs[__NUM_VXRS_HIGH]; + + if (target->thread.vxrs) { + if (target == current) + save_vx_regs(target->thread.vxrs); + memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW, + sizeof(vxrs)); + } else + memset(vxrs, 0, sizeof(vxrs)); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); +} + +static int s390_vxrs_high_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int rc; + + if (!target->thread.vxrs) { + rc = alloc_vector_registers(target); + if (rc) + return rc; + } else if (target == current) + save_vx_regs(target->thread.vxrs); + + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.vxrs + __NUM_VXRS_LOW, 0, -1); + if (rc == 0 && target == current) + restore_vx_regs(target->thread.vxrs); + + return rc; +} + #endif static int s390_system_call_get(struct task_struct *target, @@ -1038,7 +1152,7 @@ static int s390_system_call_set(struct task_struct *target, } static const struct user_regset s390_regsets[] = { - [REGSET_GENERAL] = { + { .core_note_type = NT_PRSTATUS, .n = sizeof(s390_regs) / sizeof(long), .size = sizeof(long), @@ -1046,7 +1160,7 @@ static const struct user_regset s390_regsets[] = { .get = s390_regs_get, .set = s390_regs_set, }, - [REGSET_FP] = { + { .core_note_type = NT_PRFPREG, .n = sizeof(s390_fp_regs) / sizeof(long), .size = sizeof(long), @@ -1054,8 +1168,16 @@ static const struct user_regset s390_regsets[] = { .get = s390_fpregs_get, .set = s390_fpregs_set, }, + { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, #ifdef CONFIG_64BIT - [REGSET_LAST_BREAK] = { + { .core_note_type = NT_S390_LAST_BREAK, .n = 1, .size = sizeof(long), @@ -1063,7 +1185,7 @@ static const struct user_regset s390_regsets[] = { .get = s390_last_break_get, .set = s390_last_break_set, }, - [REGSET_TDB] = { + { .core_note_type = NT_S390_TDB, .n = 1, .size = 256, @@ -1071,15 +1193,25 @@ static const struct user_regset s390_regsets[] = { .get = s390_tdb_get, .set = s390_tdb_set, }, -#endif - [REGSET_SYSTEM_CALL] = { - .core_note_type = NT_S390_SYSTEM_CALL, - .n = 1, - .size = sizeof(unsigned int), - .align = sizeof(unsigned int), - .get = s390_system_call_get, - .set = s390_system_call_set, + { + .core_note_type = NT_S390_VXRS_LOW, + .n = __NUM_VXRS_LOW, + .size = sizeof(__u64), + .align = sizeof(__u64), + .active = s390_vxrs_active, + .get = s390_vxrs_low_get, + .set = s390_vxrs_low_set, }, + { + .core_note_type = NT_S390_VXRS_HIGH, + .n = __NUM_VXRS_HIGH, + .size = sizeof(__vector128), + .align = sizeof(__vector128), + .active = s390_vxrs_active, + .get = s390_vxrs_high_get, + .set = s390_vxrs_high_set, + }, +#endif }; static const struct user_regset_view user_s390_view = { @@ -1244,7 +1376,7 @@ static int s390_compat_last_break_set(struct task_struct *target, } static const struct user_regset s390_compat_regsets[] = { - [REGSET_GENERAL] = { + { .core_note_type = NT_PRSTATUS, .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), @@ -1252,7 +1384,7 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_regs_get, .set = s390_compat_regs_set, }, - [REGSET_FP] = { + { .core_note_type = NT_PRFPREG, .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), @@ -1260,7 +1392,15 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_fpregs_get, .set = s390_fpregs_set, }, - [REGSET_LAST_BREAK] = { + { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, + { .core_note_type = NT_S390_LAST_BREAK, .n = 1, .size = sizeof(long), @@ -1268,7 +1408,7 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_last_break_get, .set = s390_compat_last_break_set, }, - [REGSET_TDB] = { + { .core_note_type = NT_S390_TDB, .n = 1, .size = 256, @@ -1276,15 +1416,25 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_tdb_get, .set = s390_tdb_set, }, - [REGSET_SYSTEM_CALL] = { - .core_note_type = NT_S390_SYSTEM_CALL, - .n = 1, - .size = sizeof(compat_uint_t), - .align = sizeof(compat_uint_t), - .get = s390_system_call_get, - .set = s390_system_call_set, + { + .core_note_type = NT_S390_VXRS_LOW, + .n = __NUM_VXRS_LOW, + .size = sizeof(__u64), + .align = sizeof(__u64), + .active = s390_vxrs_active, + .get = s390_vxrs_low_get, + .set = s390_vxrs_low_set, + }, + { + .core_note_type = NT_S390_VXRS_HIGH, + .n = __NUM_VXRS_HIGH, + .size = sizeof(__vector128), + .align = sizeof(__vector128), + .active = s390_vxrs_active, + .get = s390_vxrs_high_get, + .set = s390_vxrs_high_set, }, - [REGSET_GENERAL_EXTENDED] = { + { .core_note_type = NT_S390_HIGH_GPRS, .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), .size = sizeof(compat_long_t), diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 82bc113e8c1d..e80d9ff9a56d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -343,6 +343,9 @@ static void __init setup_lowcore(void) __ctl_set_bit(14, 29); } #else + if (MACHINE_HAS_VX) + lc->vector_save_area_addr = + (unsigned long) &lc->vector_save_area; lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif lc->sync_enter_timer = S390_lowcore.sync_enter_timer; @@ -452,8 +455,8 @@ static void __init setup_memory_end(void) #ifdef CONFIG_64BIT vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; - tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; - if (tmp <= (1UL << 42)) + tmp = tmp * (sizeof(struct page) + PAGE_SIZE); + if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42)) vmax = 1UL << 42; /* 3-level kernel page table */ else vmax = 1UL << 53; /* 4-level kernel page table */ @@ -765,6 +768,12 @@ static void __init setup_hwcaps(void) */ if (test_facility(50) && test_facility(73)) elf_hwcap |= HWCAP_S390_TE; + + /* + * Vector extension HWCAP_S390_VXRS is bit 11. + */ + if (test_facility(129)) + elf_hwcap |= HWCAP_S390_VXRS; #endif get_cpu_id(&cpu_id); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 469c4c6d9182..0c1a0ff0a558 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -31,30 +31,117 @@ #include <asm/switch_to.h> #include "entry.h" -typedef struct +/* + * Layout of an old-style signal-frame: + * ----------------------------------------- + * | save area (_SIGNAL_FRAMESIZE) | + * ----------------------------------------- + * | struct sigcontext | + * | oldmask | + * | _sigregs * | + * ----------------------------------------- + * | _sigregs with | + * | _s390_regs_common | + * | _s390_fp_regs | + * ----------------------------------------- + * | int signo | + * ----------------------------------------- + * | _sigregs_ext with | + * | gprs_high 64 byte (opt) | + * | vxrs_low 128 byte (opt) | + * | vxrs_high 256 byte (opt) | + * | reserved 128 byte (opt) | + * ----------------------------------------- + * | __u16 svc_insn | + * ----------------------------------------- + * The svc_insn entry with the sigreturn system call opcode does not + * have a fixed position and moves if gprs_high or vxrs exist. + * Future extensions will be added to _sigregs_ext. + */ +struct sigframe { __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; struct sigcontext sc; _sigregs sregs; int signo; - __u8 retcode[S390_SYSCALL_SIZE]; -} sigframe; + _sigregs_ext sregs_ext; + __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */ +}; -typedef struct +/* + * Layout of an rt signal-frame: + * ----------------------------------------- + * | save area (_SIGNAL_FRAMESIZE) | + * ----------------------------------------- + * | svc __NR_rt_sigreturn 2 byte | + * ----------------------------------------- + * | struct siginfo | + * ----------------------------------------- + * | struct ucontext_extended with | + * | unsigned long uc_flags | + * | struct ucontext *uc_link | + * | stack_t uc_stack | + * | _sigregs uc_mcontext with | + * | _s390_regs_common | + * | _s390_fp_regs | + * | sigset_t uc_sigmask | + * | _sigregs_ext uc_mcontext_ext | + * | gprs_high 64 byte (opt) | + * | vxrs_low 128 byte (opt) | + * | vxrs_high 256 byte (opt)| + * | reserved 128 byte (opt) | + * ----------------------------------------- + * Future extensions will be added to _sigregs_ext. + */ +struct rt_sigframe { __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; - __u8 retcode[S390_SYSCALL_SIZE]; + __u16 svc_insn; struct siginfo info; - struct ucontext uc; -} rt_sigframe; + struct ucontext_extended uc; +}; + +/* Store registers needed to create the signal frame */ +static void store_sigregs(void) +{ + save_access_regs(current->thread.acrs); + save_fp_ctl(¤t->thread.fp_regs.fpc); +#ifdef CONFIG_64BIT + if (current->thread.vxrs) { + int i; + + save_vx_regs(current->thread.vxrs); + for (i = 0; i < __NUM_FPRS; i++) + current->thread.fp_regs.fprs[i] = + *(freg_t *)(current->thread.vxrs + i); + } else +#endif + save_fp_regs(current->thread.fp_regs.fprs); +} + +/* Load registers after signal return */ +static void load_sigregs(void) +{ + restore_access_regs(current->thread.acrs); + /* restore_fp_ctl is done in restore_sigregs */ +#ifdef CONFIG_64BIT + if (current->thread.vxrs) { + int i; + + for (i = 0; i < __NUM_FPRS; i++) + *(freg_t *)(current->thread.vxrs + i) = + current->thread.fp_regs.fprs[i]; + restore_vx_regs(current->thread.vxrs); + } else +#endif + restore_fp_regs(current->thread.fp_regs.fprs); +} /* Returns non-zero on fault. */ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { _sigregs user_sregs; - save_access_regs(current->thread.acrs); - /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ user_sregs.regs.psw.mask = PSW_USER_BITS | @@ -63,12 +150,6 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, sizeof(user_sregs.regs.acrs)); - /* - * We have to store the fp registers to current->thread.fp_regs - * to merge them with the emulated registers. - */ - save_fp_ctl(¤t->thread.fp_regs.fpc); - save_fp_regs(current->thread.fp_regs.fprs); memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, sizeof(user_sregs.fpregs)); if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs))) @@ -107,20 +188,64 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(current->thread.acrs)); - restore_access_regs(current->thread.acrs); memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, sizeof(current->thread.fp_regs)); - restore_fp_regs(current->thread.fp_regs.fprs); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } +/* Returns non-zero on fault. */ +static int save_sigregs_ext(struct pt_regs *regs, + _sigregs_ext __user *sregs_ext) +{ +#ifdef CONFIG_64BIT + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + /* Save vector registers to signal stack */ + if (current->thread.vxrs) { + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1); + if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, + sizeof(sregs_ext->vxrs_low)) || + __copy_to_user(&sregs_ext->vxrs_high, + current->thread.vxrs + __NUM_VXRS_LOW, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + } +#endif + return 0; +} + +static int restore_sigregs_ext(struct pt_regs *regs, + _sigregs_ext __user *sregs_ext) +{ +#ifdef CONFIG_64BIT + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + /* Restore vector registers from signal stack */ + if (current->thread.vxrs) { + if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, + sizeof(sregs_ext->vxrs_low)) || + __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW, + &sregs_ext->vxrs_high, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + for (i = 0; i < __NUM_VXRS_LOW; i++) + *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i]; + } +#endif + return 0; +} + SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); - sigframe __user *frame = (sigframe __user *)regs->gprs[15]; + struct sigframe __user *frame = + (struct sigframe __user *) regs->gprs[15]; sigset_t set; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) @@ -128,6 +253,9 @@ SYSCALL_DEFINE0(sigreturn) set_current_blocked(&set); if (restore_sigregs(regs, &frame->sregs)) goto badframe; + if (restore_sigregs_ext(regs, &frame->sregs_ext)) + goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -137,16 +265,20 @@ badframe: SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); - rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15]; + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *)regs->gprs[15]; sigset_t set; if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); + if (restore_altstack(&frame->uc.uc_stack)) + goto badframe; if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; - if (restore_altstack(&frame->uc.uc_stack)) + if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -154,11 +286,6 @@ badframe: } /* - * Set up a signal frame. - */ - - -/* * Determine which stack to use.. */ static inline void __user * @@ -195,39 +322,63 @@ static inline int map_signal(int sig) static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) { - sigframe __user *frame; - - frame = get_sigframe(ka, regs, sizeof(sigframe)); + struct sigframe __user *frame; + struct sigcontext sc; + unsigned long restorer; + size_t frame_size; + /* + * gprs_high are only present for a 31-bit task running on + * a 64-bit kernel (see compat_signal.c) but the space for + * gprs_high need to be allocated if vector registers are + * included in the signal frame on a 31-bit system. + */ + frame_size = sizeof(*frame) - sizeof(frame->sregs_ext); + if (MACHINE_HAS_VX) + frame_size += sizeof(frame->sregs_ext); + frame = get_sigframe(ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE)) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (addr_t __user *) frame)) return -EFAULT; + /* Create struct sigcontext on the signal stack */ + memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE); + sc.sregs = (_sigregs __user __force *) &frame->sregs; + if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) + return -EFAULT; + + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create _sigregs on the signal stack */ if (save_sigregs(regs, &frame->sregs)) return -EFAULT; - if (__put_user(&frame->sregs, &frame->sc.sregs)) + + /* Place signal number on stack to allow backtrace from handler. */ + if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + return -EFAULT; + + /* Create _sigregs_ext on the signal stack */ + if (save_sigregs_ext(regs, &frame->sregs_ext)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (unsigned long) - ka->sa.sa_restorer | PSW_ADDR_AMODE; + restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE; } else { - regs->gprs[14] = (unsigned long) - frame->retcode | PSW_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + /* Signal frame without vector registers are short ! */ + __u16 __user *svc = (void *) frame + frame_size - 2; + if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long) svc | PSW_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (addr_t __user *) frame)) - return -EFAULT; - /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (unsigned long) frame; /* Force default amode and default user address space control. */ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | @@ -247,54 +398,69 @@ static int setup_frame(int sig, struct k_sigaction *ka, regs->gprs[5] = regs->int_parm_long; regs->gprs[6] = task_thread_info(current)->last_break; } - - /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) - return -EFAULT; return 0; } static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - int err = 0; - rt_sigframe __user *frame; - - frame = get_sigframe(&ksig->ka, regs, sizeof(rt_sigframe)); + struct rt_sigframe __user *frame; + unsigned long uc_flags, restorer; + size_t frame_size; + frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext); + /* + * gprs_high are only present for a 31-bit task running on + * a 64-bit kernel (see compat_signal.c) but the space for + * gprs_high need to be allocated if vector registers are + * included in the signal frame on a 31-bit system. + */ + uc_flags = 0; +#ifdef CONFIG_64BIT + if (MACHINE_HAS_VX) { + frame_size += sizeof(_sigregs_ext); + if (current->thread.vxrs) + uc_flags |= UC_VXRS; + } +#endif + frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (copy_siginfo_to_user(&frame->info, &ksig->info)) - return -EFAULT; - - /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(NULL, &frame->uc.uc_link); - err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]); - err |= save_sigregs(regs, &frame->uc.uc_mcontext); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (addr_t __user *) frame)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (unsigned long) + restorer = (unsigned long) ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE; } else { - regs->gprs[14] = (unsigned long) - frame->retcode | PSW_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode))) + __u16 __user *svc = &frame->svc_insn; + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long) svc | PSW_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (addr_t __user *) frame)) + /* Create siginfo on the signal stack */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + return -EFAULT; + + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create ucontext on the signal stack. */ + if (__put_user(uc_flags, &frame->uc.uc_flags) || + __put_user(NULL, &frame->uc.uc_link) || + __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || + save_sigregs(regs, &frame->uc.uc_mcontext) || + __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (unsigned long) frame; /* Force default amode and default user address space control. */ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 243c7e512600..6fd9e60101f1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -45,6 +45,7 @@ #include <asm/debug.h> #include <asm/os_info.h> #include <asm/sigp.h> +#include <asm/idle.h> #include "entry.h" enum { @@ -82,7 +83,8 @@ DEFINE_MUTEX(smp_cpu_state_mutex); /* * Signal processor helper functions. */ -static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm, + u32 *status) { int cc; @@ -178,6 +180,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) goto out; } #else + if (MACHINE_HAS_VX) + lc->vector_save_area_addr = + (unsigned long) &lc->vector_save_area; if (vdso_alloc_per_cpu(lc)) goto out; #endif @@ -333,12 +338,6 @@ int smp_vcpu_scheduled(int cpu) return pcpu_running(pcpu_devices + cpu); } -void smp_yield(void) -{ - if (MACHINE_HAS_DIAG44) - asm volatile("diag 0,0,0x44"); -} - void smp_yield_cpu(int cpu) { if (MACHINE_HAS_DIAG9C) @@ -517,35 +516,53 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); static void __init smp_get_save_area(int cpu, u16 address) { void *lc = pcpu_devices[0].lowcore; - struct save_area *save_area; + struct save_area_ext *sa_ext; + unsigned long vx_sa; if (is_kdump_kernel()) return; if (!OLDMEM_BASE && (address == boot_cpu_address || ipl_info.type != IPL_TYPE_FCP_DUMP)) return; - save_area = dump_save_area_create(cpu); - if (!save_area) + sa_ext = dump_save_area_create(cpu); + if (!sa_ext) panic("could not allocate memory for save area\n"); if (address == boot_cpu_address) { /* Copy the registers of the boot cpu. */ - copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa), SAVE_AREA_BASE - PAGE_SIZE, 0); + if (MACHINE_HAS_VX) + save_vx_regs_safe(sa_ext->vx_regs); return; } /* Get the registers of a non-boot cpu. */ __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); - memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); + memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa)); + if (!MACHINE_HAS_VX) + return; + /* Get the VX registers */ + vx_sa = __get_free_page(GFP_KERNEL); + if (!vx_sa) + panic("could not allocate memory for VX save area\n"); + __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL); + memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs)); + free_page(vx_sa); } int smp_store_status(int cpu) { + unsigned long vx_sa; struct pcpu *pcpu; pcpu = pcpu_devices + cpu; if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; + if (!MACHINE_HAS_VX) + return 0; + vx_sa = __pa(pcpu->lowcore->vector_save_area_addr); + __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, + vx_sa, NULL); return 0; } @@ -667,7 +684,7 @@ static void smp_start_secondary(void *cpuvoid) cpu_init(); preempt_disable(); init_cpu_timer(); - init_cpu_vtimer(); + vtime_init(); pfault_init(); notify_cpu_starting(smp_processor_id()); set_cpu_online(smp_processor_id(), true); @@ -726,6 +743,7 @@ int __cpu_disable(void) cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ __ctl_load(cregs, 0, 15); + clear_cpu_flag(CIF_NOHZ_DELAY); return 0; } @@ -898,42 +916,6 @@ static struct attribute_group cpu_common_attr_group = { .attrs = cpu_common_attrs, }; -static ssize_t show_idle_count(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long idle_count; - unsigned int sequence; - - do { - sequence = ACCESS_ONCE(idle->sequence); - idle_count = ACCESS_ONCE(idle->idle_count); - if (ACCESS_ONCE(idle->clock_idle_enter)) - idle_count++; - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - return sprintf(buf, "%llu\n", idle_count); -} -static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); - -static ssize_t show_idle_time(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; - unsigned int sequence; - - do { - now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); - idle_time = ACCESS_ONCE(idle->idle_time); - idle_enter = ACCESS_ONCE(idle->clock_idle_enter); - idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; - return sprintf(buf, "%llu\n", idle_time >> 12); -} -static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); - static struct attribute *cpu_online_attrs[] = { &dev_attr_idle_count.attr, &dev_attr_idle_time_us.attr, diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 4cef607f3711..69e980de0f62 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -232,6 +232,19 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_clock_nsec -= nsecps; vdso_data->wtom_clock_sec++; } + + vdso_data->xtime_coarse_sec = tk->xtime_sec; + vdso_data->xtime_coarse_nsec = + (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); + vdso_data->wtom_coarse_sec = + vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec; + vdso_data->wtom_coarse_nsec = + vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec; + while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) { + vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC; + vdso_data->wtom_coarse_sec++; + } + vdso_data->tk_mult = tk->tkr.mult; vdso_data->tk_shift = tk->tkr.shift; smp_wmb(); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 355a16c55702..b93bed76ea94 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -464,15 +464,17 @@ static struct sched_domain_topology_level s390_topology[] = { static int __init topology_init(void) { - if (!MACHINE_HAS_TOPOLOGY) { + if (MACHINE_HAS_TOPOLOGY) + set_topology_timer(); + else topology_update_polarization_simple(); - goto out; - } - set_topology_timer(); -out: - - set_sched_topology(s390_topology); - return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); } device_initcall(topology_init); + +static int __init early_topology_init(void) +{ + set_sched_topology(s390_topology); + return 0; +} +early_initcall(early_topology_init); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index c5762324d9ee..9ff5ecba26ab 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -18,6 +18,8 @@ #include <linux/ptrace.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/slab.h> +#include <asm/switch_to.h> #include "entry.h" int show_unhandled_signals = 1; @@ -58,15 +60,10 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static void __kprobes do_trap(struct pt_regs *regs, - int si_signo, int si_code, char *str) +void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { siginfo_t info; - if (notify_die(DIE_TRAP, str, regs, 0, - regs->int_code, si_signo) == NOTIFY_STOP) - return; - if (user_mode(regs)) { info.si_signo = si_signo; info.si_errno = 0; @@ -90,6 +87,15 @@ static void __kprobes do_trap(struct pt_regs *regs, } } +static void __kprobes do_trap(struct pt_regs *regs, int si_signo, int si_code, + char *str) +{ + if (notify_die(DIE_TRAP, str, regs, 0, + regs->int_code, si_signo) == NOTIFY_STOP) + return; + do_report_trap(regs, si_signo, si_code, str); +} + void __kprobes do_per_trap(struct pt_regs *regs) { siginfo_t info; @@ -178,6 +184,7 @@ void __kprobes illegal_op(struct pt_regs *regs) siginfo_t info; __u8 opcode[6]; __u16 __user *location; + int is_uprobe_insn = 0; int signal = 0; location = get_trap_ip(regs); @@ -194,6 +201,10 @@ void __kprobes illegal_op(struct pt_regs *regs) force_sig_info(SIGTRAP, &info, current); } else signal = SIGILL; +#ifdef CONFIG_UPROBES + } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) { + is_uprobe_insn = 1; +#endif #ifdef CONFIG_MATHEMU } else if (opcode[0] == 0xb3) { if (get_user(*((__u16 *) (opcode+2)), location+1)) @@ -219,11 +230,13 @@ void __kprobes illegal_op(struct pt_regs *regs) #endif } else signal = SIGILL; - } else { - /* - * If we get an illegal op in kernel mode, send it through the - * kprobes notifier. If kprobes doesn't pick it up, SIGILL - */ + } + /* + * We got either an illegal op in kernel mode, or user space trapped + * on a uprobes illegal instruction. See if kprobes or uprobes picks + * it up. If not, SIGILL. + */ + if (is_uprobe_insn || !user_mode(regs)) { if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; @@ -292,6 +305,74 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); #endif +#ifdef CONFIG_64BIT +int alloc_vector_registers(struct task_struct *tsk) +{ + __vector128 *vxrs; + int i; + + /* Allocate vector register save area. */ + vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS, + GFP_KERNEL|__GFP_REPEAT); + if (!vxrs) + return -ENOMEM; + preempt_disable(); + if (tsk == current) + save_fp_regs(tsk->thread.fp_regs.fprs); + /* Copy the 16 floating point registers */ + for (i = 0; i < 16; i++) + *(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i]; + tsk->thread.vxrs = vxrs; + if (tsk == current) { + __ctl_set_bit(0, 17); + restore_vx_regs(vxrs); + } + preempt_enable(); + return 0; +} + +void vector_exception(struct pt_regs *regs) +{ + int si_code, vic; + + if (!MACHINE_HAS_VX) { + do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation"); + return; + } + + /* get vector interrupt code from fpc */ + asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); + vic = (current->thread.fp_regs.fpc & 0xf00) >> 8; + switch (vic) { + case 1: /* invalid vector operation */ + si_code = FPE_FLTINV; + break; + case 2: /* division by zero */ + si_code = FPE_FLTDIV; + break; + case 3: /* overflow */ + si_code = FPE_FLTOVF; + break; + case 4: /* underflow */ + si_code = FPE_FLTUND; + break; + case 5: /* inexact */ + si_code = FPE_FLTRES; + break; + default: /* unknown cause */ + si_code = 0; + } + do_trap(regs, SIGFPE, si_code, "vector exception"); +} + +static int __init disable_vector_extension(char *str) +{ + S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; + return 1; +} +__setup("novx", disable_vector_extension); +#endif + void data_exception(struct pt_regs *regs) { __u16 __user *location; @@ -357,6 +438,18 @@ void data_exception(struct pt_regs *regs) } } #endif +#ifdef CONFIG_64BIT + /* Check for vector register enablement */ + if (MACHINE_HAS_VX && !current->thread.vxrs && + (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) { + alloc_vector_registers(current); + /* Vector data exception is suppressing, rewind psw. */ + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); + clear_pt_regs_flag(regs, PIF_PER_TRAP); + return; + } +#endif + if (current->thread.fp_regs.fpc & FPC_DXC_MASK) signal = SIGFPE; else diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c new file mode 100644 index 000000000000..956f4f7a591c --- /dev/null +++ b/arch/s390/kernel/uprobes.c @@ -0,0 +1,332 @@ +/* + * User-space Probes (UProbes) for s390 + * + * Copyright IBM Corp. 2014 + * Author(s): Jan Willeke, + */ + +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/uprobes.h> +#include <linux/compat.h> +#include <linux/kdebug.h> +#include <asm/switch_to.h> +#include <asm/facility.h> +#include <asm/dis.h> +#include "entry.h" + +#define UPROBE_TRAP_NR UINT_MAX + +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long addr) +{ + return probe_is_prohibited_opcode(auprobe->insn); +} + +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if (psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) + return -EINVAL; + if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) + return -EINVAL; + clear_pt_regs_flag(regs, PIF_PER_TRAP); + auprobe->saved_per = psw_bits(regs->psw).r; + auprobe->saved_int_code = regs->int_code; + regs->int_code = UPROBE_TRAP_NR; + regs->psw.addr = current->utask->xol_vaddr; + set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); + update_cr_regs(current); + return 0; +} + +bool arch_uprobe_xol_was_trapped(struct task_struct *tsk) +{ + struct pt_regs *regs = task_pt_regs(tsk); + + if (regs->int_code != UPROBE_TRAP_NR) + return true; + return false; +} + +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + int fixup = probe_get_fixup_type(auprobe->insn); + struct uprobe_task *utask = current->utask; + + clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); + update_cr_regs(current); + psw_bits(regs->psw).r = auprobe->saved_per; + regs->int_code = auprobe->saved_int_code; + + if (fixup & FIXUP_PSW_NORMAL) + regs->psw.addr += utask->vaddr - utask->xol_vaddr; + if (fixup & FIXUP_RETURN_REGISTER) { + int reg = (auprobe->insn[0] & 0xf0) >> 4; + + regs->gprs[reg] += utask->vaddr - utask->xol_vaddr; + } + if (fixup & FIXUP_BRANCH_NOT_TAKEN) { + int ilen = insn_length(auprobe->insn[0] >> 8); + + if (regs->psw.addr - utask->xol_vaddr == ilen) + regs->psw.addr = utask->vaddr + ilen; + } + /* If per tracing was active generate trap */ + if (regs->psw.mask & PSW_MASK_PER) + do_per_trap(regs); + return 0; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + if (!user_mode(regs)) + return NOTIFY_DONE; + if (regs->int_code & 0x200) /* Trap during transaction */ + return NOTIFY_DONE; + switch (val) { + case DIE_BPT: + if (uprobe_pre_sstep_notifier(regs)) + return NOTIFY_STOP; + break; + case DIE_SSTEP: + if (uprobe_post_sstep_notifier(regs)) + return NOTIFY_STOP; + default: + break; + } + return NOTIFY_DONE; +} + +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + clear_thread_flag(TIF_UPROBE_SINGLESTEP); + regs->int_code = auprobe->saved_int_code; + regs->psw.addr = current->utask->vaddr; +} + +unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, + struct pt_regs *regs) +{ + unsigned long orig; + + orig = regs->gprs[14]; + regs->gprs[14] = trampoline; + return orig; +} + +/* Instruction Emulation */ + +static void adjust_psw_addr(psw_t *psw, unsigned long len) +{ + psw->addr = __rewind_psw(*psw, -len); +} + +#define EMU_ILLEGAL_OP 1 +#define EMU_SPECIFICATION 2 +#define EMU_ADDRESSING 3 + +#define emu_load_ril(ptr, output) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(*(ptr)) input; \ + int __rc = 0; \ + \ + if (!test_facility(34)) \ + __rc = EMU_ILLEGAL_OP; \ + else if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (get_user(input, ptr)) \ + __rc = EMU_ADDRESSING; \ + else \ + *(output) = input; \ + __rc; \ +}) + +#define emu_store_ril(ptr, input) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + int __rc = 0; \ + \ + if (!test_facility(34)) \ + __rc = EMU_ILLEGAL_OP; \ + else if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (put_user(*(input), ptr)) \ + __rc = EMU_ADDRESSING; \ + __rc; \ +}) + +#define emu_cmp_ril(regs, ptr, cmp) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(*(ptr)) input; \ + int __rc = 0; \ + \ + if (!test_facility(34)) \ + __rc = EMU_ILLEGAL_OP; \ + else if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (get_user(input, ptr)) \ + __rc = EMU_ADDRESSING; \ + else if (input > *(cmp)) \ + psw_bits((regs)->psw).cc = 1; \ + else if (input < *(cmp)) \ + psw_bits((regs)->psw).cc = 2; \ + else \ + psw_bits((regs)->psw).cc = 0; \ + __rc; \ +}) + +struct insn_ril { + u8 opc0; + u8 reg : 4; + u8 opc1 : 4; + s32 disp; +} __packed; + +union split_register { + u64 u64; + u32 u32[2]; + u16 u16[4]; + s64 s64; + s32 s32[2]; + s16 s16[4]; +}; + +/* + * pc relative instructions are emulated, since parameters may not be + * accessible from the xol area due to range limitations. + */ +static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + union split_register *rx; + struct insn_ril *insn; + unsigned int ilen; + void *uptr; + int rc = 0; + + insn = (struct insn_ril *) &auprobe->insn; + rx = (union split_register *) ®s->gprs[insn->reg]; + uptr = (void *)(regs->psw.addr + (insn->disp * 2)); + ilen = insn_length(insn->opc0); + + switch (insn->opc0) { + case 0xc0: + switch (insn->opc1) { + case 0x00: /* larl */ + rx->u64 = (unsigned long)uptr; + break; + } + break; + case 0xc4: + switch (insn->opc1) { + case 0x02: /* llhrl */ + rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]); + break; + case 0x04: /* lghrl */ + rc = emu_load_ril((s16 __user *)uptr, &rx->u64); + break; + case 0x05: /* lhrl */ + rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]); + break; + case 0x06: /* llghrl */ + rc = emu_load_ril((u16 __user *)uptr, &rx->u64); + break; + case 0x08: /* lgrl */ + rc = emu_load_ril((u64 __user *)uptr, &rx->u64); + break; + case 0x0c: /* lgfrl */ + rc = emu_load_ril((s32 __user *)uptr, &rx->u64); + break; + case 0x0d: /* lrl */ + rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]); + break; + case 0x0e: /* llgfrl */ + rc = emu_load_ril((u32 __user *)uptr, &rx->u64); + break; + case 0x07: /* sthrl */ + rc = emu_store_ril((u16 __user *)uptr, &rx->u16[3]); + break; + case 0x0b: /* stgrl */ + rc = emu_store_ril((u64 __user *)uptr, &rx->u64); + break; + case 0x0f: /* strl */ + rc = emu_store_ril((u32 __user *)uptr, &rx->u32[1]); + break; + } + break; + case 0xc6: + switch (insn->opc1) { + case 0x02: /* pfdrl */ + if (!test_facility(34)) + rc = EMU_ILLEGAL_OP; + break; + case 0x04: /* cghrl */ + rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64); + break; + case 0x05: /* chrl */ + rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]); + break; + case 0x06: /* clghrl */ + rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64); + break; + case 0x07: /* clhrl */ + rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]); + break; + case 0x08: /* cgrl */ + rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64); + break; + case 0x0a: /* clgrl */ + rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64); + break; + case 0x0c: /* cgfrl */ + rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64); + break; + case 0x0d: /* crl */ + rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]); + break; + case 0x0e: /* clgfrl */ + rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64); + break; + case 0x0f: /* clrl */ + rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]); + break; + } + break; + } + adjust_psw_addr(®s->psw, ilen); + switch (rc) { + case EMU_ILLEGAL_OP: + regs->int_code = ilen << 16 | 0x0001; + do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL); + break; + case EMU_SPECIFICATION: + regs->int_code = ilen << 16 | 0x0006; + do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL); + break; + case EMU_ADDRESSING: + regs->int_code = ilen << 16 | 0x0005; + do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL); + break; + } +} + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if ((psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) || + ((psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) && + !is_compat_task())) { + regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE); + do_report_trap(regs, SIGILL, ILL_ILLADR, NULL); + return true; + } + if (probe_is_insn_relative_long(auprobe->insn)) { + handle_insn_ril(auprobe, regs); + return true; + } + return false; +} diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index 36aaa25d05da..eca3f001f081 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -19,14 +19,20 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: .cfi_startproc + basr %r1,0 + la %r1,4f-.(%r1) chi %r2,__CLOCK_REALTIME je 0f chi %r2,__CLOCK_MONOTONIC + je 0f + la %r1,5f-4f(%r1) + chi %r2,__CLOCK_REALTIME_COARSE + je 0f + chi %r2,__CLOCK_MONOTONIC_COARSE jne 3f 0: ltr %r3,%r3 jz 2f /* res == NULL */ - basr %r1,0 -1: l %r0,4f-1b(%r1) +1: l %r0,0(%r1) xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ st %r0,4(%r3) /* store tp->tv_usec */ 2: lhi %r2,0 @@ -35,5 +41,6 @@ __kernel_clock_getres: svc 0 br %r14 4: .long __CLOCK_REALTIME_RES +5: .long __CLOCK_COARSE_RES .cfi_endproc .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index 7cf18f8d4cb4..48c2206a3956 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -21,8 +21,12 @@ __kernel_clock_gettime: .cfi_startproc basr %r5,0 0: al %r5,21f-0b(%r5) /* get &_vdso_data */ + chi %r2,__CLOCK_REALTIME_COARSE + je 10f chi %r2,__CLOCK_REALTIME je 11f + chi %r2,__CLOCK_MONOTONIC_COARSE + je 9f chi %r2,__CLOCK_MONOTONIC jne 19f @@ -30,8 +34,8 @@ __kernel_clock_gettime: 1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 1b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,2f @@ -68,12 +72,32 @@ __kernel_clock_gettime: lhi %r2,0 br %r14 + /* CLOCK_MONOTONIC_COARSE */ +9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 9b + l %r2,__VDSO_WTOM_CRS_SEC+4(%r5) + l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 9b + j 8b + + /* CLOCK_REALTIME_COARSE */ +10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 10b + l %r2,__VDSO_XTIME_CRS_SEC+4(%r5) + l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 10b + j 17f + /* CLOCK_REALTIME */ 11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 11b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,12f diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index fd621a950f7c..60def5f562db 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -29,8 +29,8 @@ __kernel_gettimeofday: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 1b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,3f diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 34deba7c7ed1..c8513deb8c66 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -19,6 +19,12 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: .cfi_startproc + larl %r1,4f + cghi %r2,__CLOCK_REALTIME_COARSE + je 0f + cghi %r2,__CLOCK_MONOTONIC_COARSE + je 0f + larl %r1,3f cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -32,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - larl %r1,3f lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ @@ -42,5 +47,6 @@ __kernel_clock_getres: svc 0 br %r14 3: .quad __CLOCK_REALTIME_RES +4: .quad __CLOCK_COARSE_RES .cfi_endproc .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 3f34e09db5f4..9d9761f8e110 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -20,12 +20,16 @@ __kernel_clock_gettime: .cfi_startproc larl %r5,_vdso_data + cghi %r2,__CLOCK_REALTIME_COARSE + je 4f cghi %r2,__CLOCK_REALTIME je 5f cghi %r2,__CLOCK_THREAD_CPUTIME_ID je 9f cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ je 9f + cghi %r2,__CLOCK_MONOTONIC_COARSE + je 3f cghi %r2,__CLOCK_MONOTONIC jne 12f @@ -33,10 +37,10 @@ __kernel_clock_gettime: 0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 0b - stck 48(%r15) /* Store TOD clock */ + stcke 48(%r15) /* Store TOD clock */ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ lg %r0,__VDSO_WTOM_SEC(%r5) - lg %r1,48(%r15) + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_WTOM_NSEC(%r5) @@ -54,13 +58,33 @@ __kernel_clock_gettime: lghi %r2,0 br %r14 + /* CLOCK_MONOTONIC_COARSE */ +3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 3b + lg %r0,__VDSO_WTOM_CRS_SEC(%r5) + lg %r1,__VDSO_WTOM_CRS_NSEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 3b + j 2b + + /* CLOCK_REALTIME_COARSE */ +4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 4b + lg %r0,__VDSO_XTIME_CRS_SEC(%r5) + lg %r1,__VDSO_XTIME_CRS_NSEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 4b + j 7f + /* CLOCK_REALTIME */ 5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 5b - stck 48(%r15) /* Store TOD clock */ + stcke 48(%r15) /* Store TOD clock */ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - lg %r1,48(%r15) + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index d0860d1d0ccc..7a344995a97f 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -28,8 +28,8 @@ __kernel_gettimeofday: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 0b - stck 48(%r15) /* Store TOD clock */ - lg %r1,48(%r15) + stcke 48(%r15) /* Store TOD clock */ + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 8c34363d6f1e..416f2a323ba5 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -6,27 +6,18 @@ */ #include <linux/kernel_stat.h> -#include <linux/notifier.h> -#include <linux/kprobes.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/timex.h> #include <linux/types.h> #include <linux/time.h> -#include <linux/cpu.h> -#include <linux/smp.h> -#include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/vtimer.h> #include <asm/vtime.h> -#include <asm/irq.h> -#include "entry.h" static void virt_timer_expire(void); -DEFINE_PER_CPU(struct s390_idle_data, s390_idle); - static LIST_HEAD(virt_timer_list); static DEFINE_SPINLOCK(virt_timer_lock); static atomic64_t virt_timer_current; @@ -152,49 +143,6 @@ void vtime_account_system(struct task_struct *tsk) __attribute__((alias("vtime_account_irq_enter"))); EXPORT_SYMBOL_GPL(vtime_account_system); -void __kprobes vtime_stop_cpu(void) -{ - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - unsigned long long idle_time; - unsigned long psw_mask; - - trace_hardirqs_on(); - - /* Wait for external, I/O or machine check interrupt. */ - psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - idle->nohz_delay = 0; - - /* Call the assembler magic in entry.S */ - psw_idle(idle, psw_mask); - - /* Account time spent with enabled wait psw loaded as idle time. */ - idle->sequence++; - smp_wmb(); - idle_time = idle->clock_idle_exit - idle->clock_idle_enter; - idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; - idle->idle_time += idle_time; - idle->idle_count++; - account_idle_time(idle_time); - smp_wmb(); - idle->sequence++; -} - -cputime64_t s390_get_idle_time(int cpu) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit; - unsigned int sequence; - - do { - now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); - idle_enter = ACCESS_ONCE(idle->clock_idle_enter); - idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; -} - /* * Sorted add to a list. List is linear searched until first bigger * element is found. @@ -372,31 +320,8 @@ EXPORT_SYMBOL(del_virt_timer); /* * Start the virtual CPU timer on the current CPU. */ -void init_cpu_vtimer(void) +void vtime_init(void) { /* set initial cpu timer */ set_vtimer(VTIMER_MAX_SLICE); } - -static int s390_nohz_notify(struct notifier_block *self, unsigned long action, - void *hcpu) -{ - struct s390_idle_data *idle; - long cpu = (long) hcpu; - - idle = &per_cpu(s390_idle, cpu); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DYING: - idle->nohz_delay = 0; - default: - break; - } - return NOTIFY_OK; -} - -void __init vtime_init(void) -{ - /* Enable cpu timer interrupts on the boot cpu. */ - init_cpu_vtimer(); - cpu_notifier(s390_nohz_notify, 0); -} diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index c6d752e8bf28..a01df233856f 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -6,3 +6,5 @@ lib-y += delay.o string.o uaccess.o find.o obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o obj-$(CONFIG_64BIT) += mem64.o lib-$(CONFIG_SMP) += spinlock.o +lib-$(CONFIG_KPROBES) += probes.o +lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index a9f3d0042d58..16dc42d83f93 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -43,7 +43,7 @@ static void __udelay_disabled(unsigned long long usecs) lockdep_off(); do { set_clock_comparator(end); - vtime_stop_cpu(); + enabled_wait(); } while (get_tod_clock_fast() < end); lockdep_on(); __ctl_load(cr0, 0, 0); @@ -62,7 +62,7 @@ static void __udelay_enabled(unsigned long long usecs) clock_saved = local_tick_disable(); set_clock_comparator(end); } - vtime_stop_cpu(); + enabled_wait(); if (clock_saved) local_tick_enable(clock_saved); } while (get_tod_clock_fast() < end); diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c new file mode 100644 index 000000000000..c5d64a099719 --- /dev/null +++ b/arch/s390/lib/probes.c @@ -0,0 +1,159 @@ +/* + * Common helper functions for kprobes and uprobes + * + * Copyright IBM Corp. 2014 + */ + +#include <linux/kprobes.h> +#include <asm/dis.h> + +int probe_is_prohibited_opcode(u16 *insn) +{ + if (!is_known_insn((unsigned char *)insn)) + return -EINVAL; + switch (insn[0] >> 8) { + case 0x0c: /* bassm */ + case 0x0b: /* bsm */ + case 0x83: /* diag */ + case 0x44: /* ex */ + case 0xac: /* stnsm */ + case 0xad: /* stosm */ + return -EINVAL; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x00: /* exrl */ + return -EINVAL; + } + } + switch (insn[0]) { + case 0x0101: /* pr */ + case 0xb25a: /* bsa */ + case 0xb240: /* bakr */ + case 0xb258: /* bsg */ + case 0xb218: /* pc */ + case 0xb228: /* pt */ + case 0xb98d: /* epsw */ + case 0xe560: /* tbegin */ + case 0xe561: /* tbeginc */ + case 0xb2f8: /* tend */ + return -EINVAL; + } + return 0; +} + +int probe_get_fixup_type(u16 *insn) +{ + /* default fixup method */ + int fixup = FIXUP_PSW_NORMAL; + + switch (insn[0] >> 8) { + case 0x05: /* balr */ + case 0x0d: /* basr */ + fixup = FIXUP_RETURN_REGISTER; + /* if r2 = 0, no branch will be taken */ + if ((insn[0] & 0x0f) == 0) + fixup |= FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x06: /* bctr */ + case 0x07: /* bcr */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x45: /* bal */ + case 0x4d: /* bas */ + fixup = FIXUP_RETURN_REGISTER; + break; + case 0x47: /* bc */ + case 0x46: /* bct */ + case 0x86: /* bxh */ + case 0x87: /* bxle */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x82: /* lpsw */ + fixup = FIXUP_NOT_REQUIRED; + break; + case 0xb2: /* lpswe */ + if ((insn[0] & 0xff) == 0xb2) + fixup = FIXUP_NOT_REQUIRED; + break; + case 0xa7: /* bras */ + if ((insn[0] & 0x0f) == 0x05) + fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xc0: + if ((insn[0] & 0x0f) == 0x05) /* brasl */ + fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xeb: + switch (insn[2] & 0xff) { + case 0x44: /* bxhg */ + case 0x45: /* bxleg */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; + case 0xe3: /* bctg */ + if ((insn[2] & 0xff) == 0x46) + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0xec: + switch (insn[2] & 0xff) { + case 0xe5: /* clgrb */ + case 0xe6: /* cgrb */ + case 0xf6: /* crb */ + case 0xf7: /* clrb */ + case 0xfc: /* cgib */ + case 0xfd: /* cglib */ + case 0xfe: /* cib */ + case 0xff: /* clib */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; + } + return fixup; +} + +int probe_is_insn_relative_long(u16 *insn) +{ + /* Check if we have a RIL-b or RIL-c format instruction which + * we need to modify in order to avoid instruction emulation. */ + switch (insn[0] >> 8) { + case 0xc0: + if ((insn[0] & 0x0f) == 0x00) /* larl */ + return true; + break; + case 0xc4: + switch (insn[0] & 0x0f) { + case 0x02: /* llhrl */ + case 0x04: /* lghrl */ + case 0x05: /* lhrl */ + case 0x06: /* llghrl */ + case 0x07: /* sthrl */ + case 0x08: /* lgrl */ + case 0x0b: /* stgrl */ + case 0x0c: /* lgfrl */ + case 0x0d: /* lrl */ + case 0x0e: /* llgfrl */ + case 0x0f: /* strl */ + return true; + } + break; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x02: /* pfdrl */ + case 0x04: /* cghrl */ + case 0x05: /* chrl */ + case 0x06: /* clghrl */ + case 0x07: /* clhrl */ + case 0x08: /* cgrl */ + case 0x0a: /* clgrl */ + case 0x0c: /* cgfrl */ + case 0x0d: /* crl */ + case 0x0e: /* clgfrl */ + case 0x0f: /* clrl */ + return true; + } + break; + } + return false; +} diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 5b0e445bc3f3..034a35a3e9c1 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -98,17 +98,6 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) } EXPORT_SYMBOL(arch_spin_lock_wait_flags); -void arch_spin_relax(arch_spinlock_t *lp) -{ - unsigned int cpu = lp->lock; - if (cpu != 0) { - if (MACHINE_IS_VM || MACHINE_IS_KVM || - !smp_vcpu_scheduled(~cpu)) - smp_yield_cpu(~cpu); - } -} -EXPORT_SYMBOL(arch_spin_relax); - int arch_spin_trylock_retry(arch_spinlock_t *lp) { int count; @@ -122,15 +111,21 @@ EXPORT_SYMBOL(arch_spin_trylock_retry); void _raw_read_lock_wait(arch_rwlock_t *rw) { - unsigned int old; + unsigned int owner, old; int count = spin_retry; +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + __RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD); +#endif + owner = 0; while (1) { if (count-- <= 0) { - smp_yield(); + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); count = spin_retry; } old = ACCESS_ONCE(rw->lock); + owner = ACCESS_ONCE(rw->owner); if ((int) old < 0) continue; if (_raw_compare_and_swap(&rw->lock, old, old + 1)) @@ -139,28 +134,6 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) } EXPORT_SYMBOL(_raw_read_lock_wait); -void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) -{ - unsigned int old; - int count = spin_retry; - - local_irq_restore(flags); - while (1) { - if (count-- <= 0) { - smp_yield(); - count = spin_retry; - } - old = ACCESS_ONCE(rw->lock); - if ((int) old < 0) - continue; - local_irq_disable(); - if (_raw_compare_and_swap(&rw->lock, old, old + 1)) - return; - local_irq_restore(flags); - } -} -EXPORT_SYMBOL(_raw_read_lock_wait_flags); - int _raw_read_trylock_retry(arch_rwlock_t *rw) { unsigned int old; @@ -177,46 +150,62 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw) } EXPORT_SYMBOL(_raw_read_trylock_retry); -void _raw_write_lock_wait(arch_rwlock_t *rw) +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) { - unsigned int old; + unsigned int owner, old; int count = spin_retry; + owner = 0; while (1) { if (count-- <= 0) { - smp_yield(); + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); count = spin_retry; } old = ACCESS_ONCE(rw->lock); - if (old) - continue; - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) - return; + owner = ACCESS_ONCE(rw->owner); + smp_rmb(); + if ((int) old >= 0) { + prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); + old = prev; + } + if ((old & 0x7fffffff) == 0 && (int) prev >= 0) + break; } } EXPORT_SYMBOL(_raw_write_lock_wait); -void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +void _raw_write_lock_wait(arch_rwlock_t *rw) { - unsigned int old; + unsigned int owner, old, prev; int count = spin_retry; - local_irq_restore(flags); + prev = 0x80000000; + owner = 0; while (1) { if (count-- <= 0) { - smp_yield(); + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); count = spin_retry; } old = ACCESS_ONCE(rw->lock); - if (old) - continue; - local_irq_disable(); - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) - return; - local_irq_restore(flags); + owner = ACCESS_ONCE(rw->owner); + if ((int) old >= 0 && + _raw_compare_and_swap(&rw->lock, old, old | 0x80000000)) + prev = old; + else + smp_rmb(); + if ((old & 0x7fffffff) == 0 && (int) prev >= 0) + break; } } -EXPORT_SYMBOL(_raw_write_lock_wait_flags); +EXPORT_SYMBOL(_raw_write_lock_wait); + +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ int _raw_write_trylock_retry(arch_rwlock_t *rw) { @@ -233,3 +222,13 @@ int _raw_write_trylock_retry(arch_rwlock_t *rw) return 0; } EXPORT_SYMBOL(_raw_write_trylock_retry); + +void arch_lock_relax(unsigned int cpu) +{ + if (!cpu) + return; + if (MACHINE_IS_LPAR && smp_vcpu_scheduled(~cpu)) + return; + smp_yield_cpu(~cpu); +} +EXPORT_SYMBOL(arch_lock_relax); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 46d517c3c763..d46cadeda204 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -54,7 +54,6 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) return; } seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW "); - seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " "); seq_putc(m, '\n'); } @@ -129,7 +128,7 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, } #ifdef CONFIG_64BIT -#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO) +#define _PMD_PROT_MASK _SEGMENT_ENTRY_PROTECT #else #define _PMD_PROT_MASK 0 #endif @@ -157,7 +156,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, } #ifdef CONFIG_64BIT -#define _PUD_PROT_MASK (_REGION3_ENTRY_RO | _REGION3_ENTRY_CO) +#define _PUD_PROT_MASK _REGION3_ENTRY_RO #else #define _PUD_PROT_MASK 0 #endif diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 389bc17934b7..3c80d2e38f03 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -88,7 +88,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= pte_page(pte)[1].index; } else - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO; + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; *(pmd_t *) ptep = pmd; } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 8400f494623f..3fef3b299665 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -6,6 +6,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <asm/cacheflush.h> +#include <asm/facility.h> #include <asm/pgtable.h> #include <asm/page.h> @@ -103,27 +104,50 @@ int set_memory_x(unsigned long addr, int numpages) } #ifdef CONFIG_DEBUG_PAGEALLOC + +static void ipte_range(pte_t *pte, unsigned long address, int nr) +{ + int i; + + if (test_facility(13) && IS_ENABLED(CONFIG_64BIT)) { + __ptep_ipte_range(address, nr - 1, pte); + return; + } + for (i = 0; i < nr; i++) { + __ptep_ipte(address, pte); + address += PAGE_SIZE; + pte++; + } +} + void kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long address; + int nr, i, j; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; - int i; - for (i = 0; i < numpages; i++) { + for (i = 0; i < numpages;) { address = page_to_phys(page + i); pgd = pgd_offset_k(address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); - if (!enable) { - __ptep_ipte(address, pte); - pte_val(*pte) = _PAGE_INVALID; - continue; + nr = (unsigned long)pte >> ilog2(sizeof(long)); + nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); + nr = min(numpages - i, nr); + if (enable) { + for (j = 0; j < nr; j++) { + pte_val(*pte) = __pa(address); + address += PAGE_SIZE; + pte++; + } + } else { + ipte_range(pte, address, nr); } - pte_val(*pte) = __pa(address); + i += nr; } } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index fdbd7888cb07..b1593c2f751a 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -236,8 +236,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (!new_page) goto out; pmd_val(*pm_dir) = __pa(new_page) | - _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | - _SEGMENT_ENTRY_CO; + _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE; address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -253,9 +252,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pt_dir = pte_offset_kernel(pm_dir, address); if (pte_none(*pt_dir)) { - unsigned long new_page; + void *new_page; - new_page =__pa(vmem_alloc_pages(0)); + new_page = vmemmap_alloc_block(PAGE_SIZE, node); if (!new_page) goto out; pte_val(*pt_dir) = @@ -263,7 +262,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) } address += PAGE_SIZE; } - memset((void *)start, 0, end - start); ret = 0; out: return ret; diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h index a273c88578fc..97a5fda83450 100644 --- a/arch/sh/include/asm/atomic-grb.h +++ b/arch/sh/include/asm/atomic-grb.h @@ -1,85 +1,56 @@ #ifndef __ASM_SH_ATOMIC_GRB_H #define __ASM_SH_ATOMIC_GRB_H -static inline void atomic_add(int i, atomic_t *v) -{ - int tmp; - - __asm__ __volatile__ ( - " .align 2 \n\t" - " mova 1f, r0 \n\t" /* r0 = end point */ - " mov r15, r1 \n\t" /* r1 = saved sp */ - " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ - " mov.l @%1, %0 \n\t" /* load old value */ - " add %2, %0 \n\t" /* add */ - " mov.l %0, @%1 \n\t" /* store new value */ - "1: mov r1, r15 \n\t" /* LOGOUT */ - : "=&r" (tmp), - "+r" (v) - : "r" (i) - : "memory" , "r0", "r1"); -} - -static inline void atomic_sub(int i, atomic_t *v) -{ - int tmp; - - __asm__ __volatile__ ( - " .align 2 \n\t" - " mova 1f, r0 \n\t" /* r0 = end point */ - " mov r15, r1 \n\t" /* r1 = saved sp */ - " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ - " mov.l @%1, %0 \n\t" /* load old value */ - " sub %2, %0 \n\t" /* sub */ - " mov.l %0, @%1 \n\t" /* store new value */ - "1: mov r1, r15 \n\t" /* LOGOUT */ - : "=&r" (tmp), - "+r" (v) - : "r" (i) - : "memory" , "r0", "r1"); -} - -static inline int atomic_add_return(int i, atomic_t *v) -{ - int tmp; +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + int tmp; \ + \ + __asm__ __volatile__ ( \ + " .align 2 \n\t" \ + " mova 1f, r0 \n\t" /* r0 = end point */ \ + " mov r15, r1 \n\t" /* r1 = saved sp */ \ + " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ \ + " mov.l @%1, %0 \n\t" /* load old value */ \ + " " #op " %2, %0 \n\t" /* $op */ \ + " mov.l %0, @%1 \n\t" /* store new value */ \ + "1: mov r1, r15 \n\t" /* LOGOUT */ \ + : "=&r" (tmp), \ + "+r" (v) \ + : "r" (i) \ + : "memory" , "r0", "r1"); \ +} \ - __asm__ __volatile__ ( - " .align 2 \n\t" - " mova 1f, r0 \n\t" /* r0 = end point */ - " mov r15, r1 \n\t" /* r1 = saved sp */ - " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ - " mov.l @%1, %0 \n\t" /* load old value */ - " add %2, %0 \n\t" /* add */ - " mov.l %0, @%1 \n\t" /* store new value */ - "1: mov r1, r15 \n\t" /* LOGOUT */ - : "=&r" (tmp), - "+r" (v) - : "r" (i) - : "memory" , "r0", "r1"); - - return tmp; +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int tmp; \ + \ + __asm__ __volatile__ ( \ + " .align 2 \n\t" \ + " mova 1f, r0 \n\t" /* r0 = end point */ \ + " mov r15, r1 \n\t" /* r1 = saved sp */ \ + " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ \ + " mov.l @%1, %0 \n\t" /* load old value */ \ + " " #op " %2, %0 \n\t" /* $op */ \ + " mov.l %0, @%1 \n\t" /* store new value */ \ + "1: mov r1, r15 \n\t" /* LOGOUT */ \ + : "=&r" (tmp), \ + "+r" (v) \ + : "r" (i) \ + : "memory" , "r0", "r1"); \ + \ + return tmp; \ } -static inline int atomic_sub_return(int i, atomic_t *v) -{ - int tmp; +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) - __asm__ __volatile__ ( - " .align 2 \n\t" - " mova 1f, r0 \n\t" /* r0 = end point */ - " mov r15, r1 \n\t" /* r1 = saved sp */ - " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ - " mov.l @%1, %0 \n\t" /* load old value */ - " sub %2, %0 \n\t" /* sub */ - " mov.l %0, @%1 \n\t" /* store new value */ - "1: mov r1, r15 \n\t" /* LOGOUT */ - : "=&r" (tmp), - "+r" (v) - : "r" (i) - : "memory", "r0", "r1"); +ATOMIC_OPS(add) +ATOMIC_OPS(sub) - return tmp; -} +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) { diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h index 9f7c56609e53..61d107523f06 100644 --- a/arch/sh/include/asm/atomic-irq.h +++ b/arch/sh/include/asm/atomic-irq.h @@ -8,49 +8,39 @@ * forward to code at the end of this object's .text section, then * branch back to restart the operation. */ -static inline void atomic_add(int i, atomic_t *v) -{ - unsigned long flags; - - raw_local_irq_save(flags); - v->counter += i; - raw_local_irq_restore(flags); -} -static inline void atomic_sub(int i, atomic_t *v) -{ - unsigned long flags; - - raw_local_irq_save(flags); - v->counter -= i; - raw_local_irq_restore(flags); +#define ATOMIC_OP(op, c_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ } -static inline int atomic_add_return(int i, atomic_t *v) -{ - unsigned long temp, flags; - - raw_local_irq_save(flags); - temp = v->counter; - temp += i; - v->counter = temp; - raw_local_irq_restore(flags); - - return temp; +#define ATOMIC_OP_RETURN(op, c_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long temp, flags; \ + \ + raw_local_irq_save(flags); \ + temp = v->counter; \ + temp c_op i; \ + v->counter = temp; \ + raw_local_irq_restore(flags); \ + \ + return temp; \ } -static inline int atomic_sub_return(int i, atomic_t *v) -{ - unsigned long temp, flags; +#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) - raw_local_irq_save(flags); - temp = v->counter; - temp -= i; - v->counter = temp; - raw_local_irq_restore(flags); +ATOMIC_OPS(add, +=) +ATOMIC_OPS(sub, -=) - return temp; -} +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) { diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index 4b00b78e3f4f..8575dccb9ef7 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -2,39 +2,6 @@ #define __ASM_SH_ATOMIC_LLSC_H /* - * To get proper branch prediction for the main line, we must branch - * forward to code at the end of this object's .text section, then - * branch back to restart the operation. - */ -static inline void atomic_add(int i, atomic_t *v) -{ - unsigned long tmp; - - __asm__ __volatile__ ( -"1: movli.l @%2, %0 ! atomic_add \n" -" add %1, %0 \n" -" movco.l %0, @%2 \n" -" bf 1b \n" - : "=&z" (tmp) - : "r" (i), "r" (&v->counter) - : "t"); -} - -static inline void atomic_sub(int i, atomic_t *v) -{ - unsigned long tmp; - - __asm__ __volatile__ ( -"1: movli.l @%2, %0 ! atomic_sub \n" -" sub %1, %0 \n" -" movco.l %0, @%2 \n" -" bf 1b \n" - : "=&z" (tmp) - : "r" (i), "r" (&v->counter) - : "t"); -} - -/* * SH-4A note: * * We basically get atomic_xxx_return() for free compared with @@ -42,39 +9,53 @@ static inline void atomic_sub(int i, atomic_t *v) * encoding, so the retval is automatically set without having to * do any special work. */ -static inline int atomic_add_return(int i, atomic_t *v) -{ - unsigned long temp; +/* + * To get proper branch prediction for the main line, we must branch + * forward to code at the end of this object's .text section, then + * branch back to restart the operation. + */ - __asm__ __volatile__ ( -"1: movli.l @%2, %0 ! atomic_add_return \n" -" add %1, %0 \n" -" movco.l %0, @%2 \n" -" bf 1b \n" -" synco \n" - : "=&z" (temp) - : "r" (i), "r" (&v->counter) - : "t"); +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + \ + __asm__ __volatile__ ( \ +"1: movli.l @%2, %0 ! atomic_" #op "\n" \ +" " #op " %1, %0 \n" \ +" movco.l %0, @%2 \n" \ +" bf 1b \n" \ + : "=&z" (tmp) \ + : "r" (i), "r" (&v->counter) \ + : "t"); \ +} - return temp; +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long temp; \ + \ + __asm__ __volatile__ ( \ +"1: movli.l @%2, %0 ! atomic_" #op "_return \n" \ +" " #op " %1, %0 \n" \ +" movco.l %0, @%2 \n" \ +" bf 1b \n" \ +" synco \n" \ + : "=&z" (temp) \ + : "r" (i), "r" (&v->counter) \ + : "t"); \ + \ + return temp; \ } -static inline int atomic_sub_return(int i, atomic_t *v) -{ - unsigned long temp; +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) - __asm__ __volatile__ ( -"1: movli.l @%2, %0 ! atomic_sub_return \n" -" sub %1, %0 \n" -" movco.l %0, @%2 \n" -" bf 1b \n" -" synco \n" - : "=&z" (temp) - : "r" (i), "r" (&v->counter) - : "t"); +ATOMIC_OPS(add) +ATOMIC_OPS(sub) - return temp; -} +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) { diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index f57b8a6743b3..05b9f74ce2d5 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -14,7 +14,7 @@ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v,i) ((v)->counter = (i)) #if defined(CONFIG_GUSA_RB) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index a537816613f9..96ac69c5eba0 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -67,6 +67,7 @@ config SPARC64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_CONTEXT_TRACKING select HAVE_DEBUG_KMEMLEAK + select SPARSE_IRQ select RTC_DRV_CMOS select RTC_DRV_BQ4802 select RTC_DRV_SUN4V diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 7aed2be45b44..765c1776ec9f 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -20,23 +20,22 @@ #define ATOMIC_INIT(i) { (i) } -int __atomic_add_return(int, atomic_t *); +int atomic_add_return(int, atomic_t *); int atomic_cmpxchg(atomic_t *, int, int); #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) int __atomic_add_unless(atomic_t *, int, int); void atomic_set(atomic_t *, int); -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic_add(i, v) ((void)__atomic_add_return( (int)(i), (v))) -#define atomic_sub(i, v) ((void)__atomic_add_return(-(int)(i), (v))) -#define atomic_inc(v) ((void)__atomic_add_return( 1, (v))) -#define atomic_dec(v) ((void)__atomic_add_return( -1, (v))) +#define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v))) +#define atomic_sub(i, v) ((void)atomic_add_return(-(int)(i), (v))) +#define atomic_inc(v) ((void)atomic_add_return( 1, (v))) +#define atomic_dec(v) ((void)atomic_add_return( -1, (v))) -#define atomic_add_return(i, v) (__atomic_add_return( (int)(i), (v))) -#define atomic_sub_return(i, v) (__atomic_add_return(-(int)(i), (v))) -#define atomic_inc_return(v) (__atomic_add_return( 1, (v))) -#define atomic_dec_return(v) (__atomic_add_return( -1, (v))) +#define atomic_sub_return(i, v) (atomic_add_return(-(int)(i), (v))) +#define atomic_inc_return(v) (atomic_add_return( 1, (v))) +#define atomic_dec_return(v) (atomic_add_return( -1, (v))) #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index bb894c8bec56..4082749913ce 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -14,33 +14,34 @@ #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } -#define atomic_read(v) (*(volatile int *)&(v)->counter) -#define atomic64_read(v) (*(volatile long *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) +#define atomic64_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v, i) (((v)->counter) = i) #define atomic64_set(v, i) (((v)->counter) = i) -void atomic_add(int, atomic_t *); -void atomic64_add(long, atomic64_t *); -void atomic_sub(int, atomic_t *); -void atomic64_sub(long, atomic64_t *); +#define ATOMIC_OP(op) \ +void atomic_##op(int, atomic_t *); \ +void atomic64_##op(long, atomic64_t *); -int atomic_add_ret(int, atomic_t *); -long atomic64_add_ret(long, atomic64_t *); -int atomic_sub_ret(int, atomic_t *); -long atomic64_sub_ret(long, atomic64_t *); +#define ATOMIC_OP_RETURN(op) \ +int atomic_##op##_return(int, atomic_t *); \ +long atomic64_##op##_return(long, atomic64_t *); -#define atomic_dec_return(v) atomic_sub_ret(1, v) -#define atomic64_dec_return(v) atomic64_sub_ret(1, v) +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) -#define atomic_inc_return(v) atomic_add_ret(1, v) -#define atomic64_inc_return(v) atomic64_add_ret(1, v) +ATOMIC_OPS(add) +ATOMIC_OPS(sub) -#define atomic_sub_return(i, v) atomic_sub_ret(i, v) -#define atomic64_sub_return(i, v) atomic64_sub_ret(i, v) +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP -#define atomic_add_return(i, v) atomic_add_ret(i, v) -#define atomic64_add_return(i, v) atomic64_add_ret(i, v) +#define atomic_dec_return(v) atomic_sub_return(1, v) +#define atomic64_dec_return(v) atomic64_sub_return(1, v) + +#define atomic_inc_return(v) atomic_add_return(1, v) +#define atomic64_inc_return(v) atomic64_add_return(1, v) /* * atomic_inc_and_test - increment and test @@ -53,11 +54,11 @@ long atomic64_sub_ret(long, atomic64_t *); #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) -#define atomic_sub_and_test(i, v) (atomic_sub_ret(i, v) == 0) -#define atomic64_sub_and_test(i, v) (atomic64_sub_ret(i, v) == 0) +#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) +#define atomic64_sub_and_test(i, v) (atomic64_sub_return(i, v) == 0) -#define atomic_dec_and_test(v) (atomic_sub_ret(1, v) == 0) -#define atomic64_dec_and_test(v) (atomic64_sub_ret(1, v) == 0) +#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) +#define atomic64_dec_and_test(v) (atomic64_sub_return(1, v) == 0) #define atomic_inc(v) atomic_add(1, v) #define atomic64_inc(v) atomic64_add(1, v) @@ -65,8 +66,8 @@ long atomic64_sub_ret(long, atomic64_t *); #define atomic_dec(v) atomic_sub(1, v) #define atomic64_dec(v) atomic64_sub(1, v) -#define atomic_add_negative(i, v) (atomic_add_ret(i, v) < 0) -#define atomic64_add_negative(i, v) (atomic64_add_ret(i, v) < 0) +#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0) +#define atomic64_add_negative(i, v) (atomic64_add_return(i, v) < 0) #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 1ee02710b2dc..5b1b52a04ad6 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -20,10 +20,12 @@ extern struct bus_type pci_bus_type; static inline struct dma_map_ops *get_dma_ops(struct device *dev) { -#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) +#ifdef CONFIG_SPARC_LEON if (sparc_cpu_model == sparc_leon) return leon_dma_ops; - else if (dev->bus == &pci_bus_type) +#endif +#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) + if (dev->bus == &pci_bus_type) return &pci32_dma_ops; #endif return dma_ops; diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 94b39caea3eb..4f6725ff4c33 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2947,6 +2947,16 @@ unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, unsigned long reg_val); #endif +#define HV_FAST_T5_GET_PERFREG 0x1a8 +#define HV_FAST_T5_SET_PERFREG 0x1a9 + +#ifndef __ASSEMBLY__ +unsigned long sun4v_t5_get_perfreg(unsigned long reg_num, + unsigned long *reg_val); +unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, + unsigned long reg_val); +#endif + /* Function numbers for HV_CORE_TRAP. */ #define HV_CORE_SET_VER 0x00 #define HV_CORE_PUTCHAR 0x01 @@ -2978,6 +2988,7 @@ unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, #define HV_GRP_VF_CPU 0x0205 #define HV_GRP_KT_CPU 0x0209 #define HV_GRP_VT_CPU 0x020c +#define HV_GRP_T5_CPU 0x0211 #define HV_GRP_DIAG 0x0300 #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 91d219381306..3f70f900e834 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -37,7 +37,7 @@ * * ino_bucket->irq allocation is made during {sun4v_,}build_irq(). */ -#define NR_IRQS 255 +#define NR_IRQS (2048) void irq_install_pre_handler(int irq, void (*func)(unsigned int, void *, void *), @@ -57,11 +57,8 @@ unsigned int sun4u_build_msi(u32 portid, unsigned int *irq_p, unsigned long iclr_base); void sun4u_destroy_msi(unsigned int irq); -unsigned char irq_alloc(unsigned int dev_handle, - unsigned int dev_ino); -#ifdef CONFIG_PCI_MSI +unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino); void irq_free(unsigned int irq); -#endif void __init init_IRQ(void); void fixup_irqs(void); diff --git a/arch/sparc/include/asm/ldc.h b/arch/sparc/include/asm/ldc.h index c8c67f621f4f..58ab64de25d2 100644 --- a/arch/sparc/include/asm/ldc.h +++ b/arch/sparc/include/asm/ldc.h @@ -53,13 +53,14 @@ struct ldc_channel; /* Allocate state for a channel. */ struct ldc_channel *ldc_alloc(unsigned long id, const struct ldc_channel_config *cfgp, - void *event_arg); + void *event_arg, + const char *name); /* Shut down and free state for a channel. */ void ldc_free(struct ldc_channel *lp); /* Register TX and RX queues of the link with the hypervisor. */ -int ldc_bind(struct ldc_channel *lp, const char *name); +int ldc_bind(struct ldc_channel *lp); /* For non-RAW protocols we need to complete a handshake before * communication can proceed. ldc_connect() does that, if the diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index bf109984a032..8c2a8c937540 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -57,18 +57,21 @@ void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *topa typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long iopte; } iopte_t; typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) #define iopte_val(x) ((x).iopte) #define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) #define __pte(x) ((pte_t) { (x) } ) #define __iopte(x) ((iopte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) @@ -77,18 +80,21 @@ typedef struct { unsigned long pgprot; } pgprot_t; typedef unsigned long pte_t; typedef unsigned long iopte_t; typedef unsigned long pmd_t; +typedef unsigned long pud_t; typedef unsigned long pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) #define iopte_val(x) (x) #define pmd_val(x) (x) +#define pud_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) #define __iopte(x) (x) #define __pmd(x) (x) +#define __pud(x) (x) #define __pgd(x) (x) #define __pgprot(x) (x) @@ -96,21 +102,14 @@ typedef unsigned long pgprot_t; typedef pte_t *pgtable_t; -/* These two values define the virtual address space range in which we - * must forbid 64-bit user processes from making mappings. It used to - * represent precisely the virtual address space hole present in most - * early sparc64 chips including UltraSPARC-I. But now it also is - * further constrained by the limits of our page tables, which is - * 43-bits of virtual address. - */ -#define SPARC64_VA_HOLE_TOP _AC(0xfffffc0000000000,UL) -#define SPARC64_VA_HOLE_BOTTOM _AC(0x0000040000000000,UL) +extern unsigned long sparc64_va_hole_top; +extern unsigned long sparc64_va_hole_bottom; /* The next two defines specify the actual exclusion region we * enforce, wherein we use a 4GB red zone on each side of the VA hole. */ -#define VA_EXCLUDE_START (SPARC64_VA_HOLE_BOTTOM - (1UL << 32UL)) -#define VA_EXCLUDE_END (SPARC64_VA_HOLE_TOP + (1UL << 32UL)) +#define VA_EXCLUDE_START (sparc64_va_hole_bottom - (1UL << 32UL)) +#define VA_EXCLUDE_END (sparc64_va_hole_top + (1UL << 32UL)) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ _AC(0x0000000070000000,UL) : \ @@ -118,20 +117,16 @@ typedef pte_t *pgtable_t; #include <asm-generic/memory_model.h> -#define PAGE_OFFSET_BY_BITS(X) (-(_AC(1,UL) << (X))) extern unsigned long PAGE_OFFSET; #endif /* !(__ASSEMBLY__) */ -/* The maximum number of physical memory address bits we support, this - * is used to size various tables used to manage kernel TLB misses and - * also the sparsemem code. +/* The maximum number of physical memory address bits we support. The + * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS" + * evaluates to. */ -#define MAX_PHYS_ADDRESS_BITS 47 +#define MAX_PHYS_ADDRESS_BITS 53 -/* These two shift counts are used when indexing sparc64_valid_addr_bitmap - * and kpte_linear_bitmap. - */ #define ILOG2_4MB 22 #define ILOG2_256MB 28 diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 39a7ac49b00c..5e3187185b4a 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -15,6 +15,13 @@ extern struct kmem_cache *pgtable_cache; +static inline void __pgd_populate(pgd_t *pgd, pud_t *pud) +{ + pgd_set(pgd, pud); +} + +#define pgd_populate(MM, PGD, PUD) __pgd_populate(PGD, PUD) + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); @@ -25,7 +32,23 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgtable_cache, pgd); } -#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) +static inline void __pud_populate(pud_t *pud, pmd_t *pmd) +{ + pud_set(pud, pmd); +} + +#define pud_populate(MM, PUD, PMD) __pud_populate(PUD, PMD) + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + kmem_cache_free(pgtable_cache, pud); +} static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -91,4 +114,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte, #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, false) +#define __pud_free_tlb(tlb, pud, addr) \ + pgtable_free_tlb(tlb, pud, false) + #endif /* _SPARC64_PGALLOC_H */ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 3770bf5c6e1b..bfeb626085ac 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -20,8 +20,6 @@ #include <asm/page.h> #include <asm/processor.h> -#include <asm-generic/pgtable-nopud.h> - /* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB). * The page copy blockops can use 0x6000000 to 0x8000000. * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range. @@ -42,10 +40,7 @@ #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL) #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL) #define VMALLOC_START _AC(0x0000000100000000,UL) -#define VMALLOC_END _AC(0x0000010000000000,UL) -#define VMEMMAP_BASE _AC(0x0000010000000000,UL) - -#define vmemmap ((struct page *)VMEMMAP_BASE) +#define VMEMMAP_BASE VMALLOC_END /* PMD_SHIFT determines the size of the area a second-level page * table can map @@ -55,13 +50,25 @@ #define PMD_MASK (~(PMD_SIZE-1)) #define PMD_BITS (PAGE_SHIFT - 3) -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS) +/* PUD_SHIFT determines the size of the area a third-level page + * table can map + */ +#define PUD_SHIFT (PMD_SHIFT + PMD_BITS) +#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PUD_BITS (PAGE_SHIFT - 3) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_BITS) #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PGDIR_BITS (PAGE_SHIFT - 3) -#if (PGDIR_SHIFT + PGDIR_BITS) != 43 +#if (MAX_PHYS_ADDRESS_BITS > PGDIR_SHIFT + PGDIR_BITS) +#error MAX_PHYS_ADDRESS_BITS exceeds what kernel page tables can support +#endif + +#if (PGDIR_SHIFT + PGDIR_BITS) != 53 #error Page table parameters do not cover virtual address space properly. #endif @@ -71,28 +78,18 @@ #ifndef __ASSEMBLY__ -#include <linux/sched.h> - -extern unsigned long sparc64_valid_addr_bitmap[]; +extern unsigned long VMALLOC_END; -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -static inline bool __kern_addr_valid(unsigned long paddr) -{ - if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL) - return false; - return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap); -} +#define vmemmap ((struct page *)VMEMMAP_BASE) -static inline bool kern_addr_valid(unsigned long addr) -{ - unsigned long paddr = __pa(addr); +#include <linux/sched.h> - return __kern_addr_valid(paddr); -} +bool kern_addr_valid(unsigned long addr); /* Entries per page directory level. */ #define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) #define PTRS_PER_PMD (1UL << PMD_BITS) +#define PTRS_PER_PUD (1UL << PUD_BITS) #define PTRS_PER_PGD (1UL << PGDIR_BITS) /* Kernel has a separate 44bit address space. */ @@ -101,6 +98,9 @@ static inline bool kern_addr_valid(unsigned long addr) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n", \ __FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %p(%016lx) seen at (%pS)\n", \ + __FILE__, __LINE__, &(e), pud_val(e), __builtin_return_address(0)) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n", \ __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0)) @@ -112,6 +112,7 @@ static inline bool kern_addr_valid(unsigned long addr) #define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/ #define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */ #define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ +#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE /* Advertise support for _PAGE_SPECIAL */ #define __HAVE_ARCH_PTE_SPECIAL @@ -658,26 +659,26 @@ static inline unsigned long pmd_large(pmd_t pmd) return pte_val(pte) & _PAGE_PMD_HUGE; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline unsigned long pmd_young(pmd_t pmd) +static inline unsigned long pmd_pfn(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_young(pte); + return pte_pfn(pte); } -static inline unsigned long pmd_write(pmd_t pmd) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_write(pte); + return pte_young(pte); } -static inline unsigned long pmd_pfn(pmd_t pmd) +static inline unsigned long pmd_write(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_pfn(pte); + return pte_write(pte); } static inline unsigned long pmd_trans_huge(pmd_t pmd) @@ -771,13 +772,15 @@ static inline int pmd_present(pmd_t pmd) * the top bits outside of the range of any physical address size we * support are clear as well. We also validate the physical itself. */ -#define pmd_bad(pmd) ((pmd_val(pmd) & ~PAGE_MASK) || \ - !__kern_addr_valid(pmd_val(pmd))) +#define pmd_bad(pmd) (pmd_val(pmd) & ~PAGE_MASK) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) ((pud_val(pud) & ~PAGE_MASK) || \ - !__kern_addr_valid(pud_val(pud))) +#define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK) + +#define pgd_none(pgd) (!pgd_val(pgd)) + +#define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK) #ifdef CONFIG_TRANSPARENT_HUGEPAGE void set_pmd_at(struct mm_struct *mm, unsigned long addr, @@ -815,10 +818,31 @@ static inline unsigned long __pmd_page(pmd_t pmd) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) #define pud_present(pud) (pud_val(pud) != 0U) #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) +#define pgd_page_vaddr(pgd) \ + ((unsigned long) __va(pgd_val(pgd))) +#define pgd_present(pgd) (pgd_val(pgd) != 0U) +#define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL) + +static inline unsigned long pud_large(pud_t pud) +{ + pte_t pte = __pte(pud_val(pud)); + + return pte_val(pte) & _PAGE_PMD_HUGE; +} + +static inline unsigned long pud_pfn(pud_t pud) +{ + pte_t pte = __pte(pud_val(pud)); + + return pte_pfn(pte); +} /* Same in both SUN4V and SUN4U. */ #define pte_none(pte) (!pte_val(pte)) +#define pgd_set(pgdp, pudp) \ + (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp)))) + /* to find an entry in a page-table-directory. */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) @@ -826,6 +850,11 @@ static inline unsigned long __pmd_page(pmd_t pmd) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) +/* Find an entry in the third-level page table.. */ +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pud_offset(pgdp, address) \ + ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address)) + /* Find an entry in the second-level page table.. */ #define pmd_offset(pudp, address) \ ((pmd_t *) pud_page_vaddr(*(pudp)) + \ @@ -898,7 +927,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, #endif extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pmd_t swapper_low_pmd_dir[PTRS_PER_PMD]; void paging_init(void); unsigned long find_ecache_flush_span(unsigned long size); diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 3fc58691dbd0..56f933816144 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -45,6 +45,8 @@ #define SUN4V_CHIP_NIAGARA3 0x03 #define SUN4V_CHIP_NIAGARA4 0x04 #define SUN4V_CHIP_NIAGARA5 0x05 +#define SUN4V_CHIP_SPARC_M6 0x06 +#define SUN4V_CHIP_SPARC_M7 0x07 #define SUN4V_CHIP_SPARC64X 0x8a #define SUN4V_CHIP_UNKNOWN 0xff diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index a5f01ac6d0f1..f85dc8512ab3 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -102,6 +102,7 @@ struct thread_info { #define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */ #define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */ #define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */ +#define FAULT_CODE_BAD_RA 0x20 /* Bad RA for sun4v */ #if PAGE_SHIFT == 13 #define THREAD_SIZE (2*PAGE_SIZE) diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index 90916f955cac..ecb49cfa3be9 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -133,9 +133,24 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; sub TSB, 0x8, TSB; \ TSB_STORE(TSB, TAG); - /* Do a kernel page table walk. Leaves physical PTE pointer in - * REG1. Jumps to FAIL_LABEL on early page table walk termination. - * VADDR will not be clobbered, but REG2 will. + /* Do a kernel page table walk. Leaves valid PTE value in + * REG1. Jumps to FAIL_LABEL on early page table walk + * termination. VADDR will not be clobbered, but REG2 will. + * + * There are two masks we must apply to propagate bits from + * the virtual address into the PTE physical address field + * when dealing with huge pages. This is because the page + * table boundaries do not match the huge page size(s) the + * hardware supports. + * + * In these cases we propagate the bits that are below the + * page table level where we saw the huge page mapping, but + * are still within the relevant physical bits for the huge + * page size in question. So for PMD mappings (which fall on + * bit 23, for 8MB per PMD) we must propagate bit 22 for a + * 4MB huge page. For huge PUDs (which fall on bit 33, for + * 8GB per PUD), we have to accomodate 256MB and 2GB huge + * pages. So for those we propagate bits 32 to 28. */ #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \ sethi %hi(swapper_pg_dir), REG1; \ @@ -145,15 +160,40 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; andn REG2, 0x7, REG2; \ ldx [REG1 + REG2], REG1; \ brz,pn REG1, FAIL_LABEL; \ - sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ brz,pn REG1, FAIL_LABEL; \ - sllx VADDR, 64 - PMD_SHIFT, REG2; \ + sethi %uhi(_PAGE_PUD_HUGE), REG2; \ + brz,pn REG1, FAIL_LABEL; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + sethi %hi(0xf8000000), REG2; \ + bne,pt %xcc, 697f; \ + sllx REG2, 1, REG2; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ - add REG1, REG2, REG1; + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + sethi %uhi(_PAGE_PMD_HUGE), REG2; \ + brz,pn REG1, FAIL_LABEL; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + be,pn %xcc, 698f; \ + sethi %hi(0x400000), REG2; \ +697: brgez,pn REG1, FAIL_LABEL; \ + andn REG1, REG2, REG1; \ + and VADDR, REG2, REG2; \ + ba,pt %xcc, 699f; \ + or REG1, REG2, REG1; \ +698: sllx VADDR, 64 - PMD_SHIFT, REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brgez,pn REG1, FAIL_LABEL; \ + nop; \ +699: /* PMD has been loaded into REG1, interpret the value, seeing * if it is a HUGE PMD or a normal one. If it is not valid @@ -198,6 +238,11 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; andn REG2, 0x7, REG2; \ ldxa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ @@ -246,8 +291,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; (KERNEL_TSB_SIZE_BYTES / 16) #define KERNEL_TSB4M_NENTRIES 4096 -#define KTSB_PHYS_SHIFT 15 - /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries * and the found TTE will be left in REG1. REG3 and REG4 must @@ -256,17 +299,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * VADDR and TAG will be preserved and not clobbered by this macro. */ #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ -661: sethi %hi(swapper_tsb), REG1; \ - or REG1, %lo(swapper_tsb), REG1; \ +661: sethi %uhi(swapper_tsb), REG1; \ + sethi %hi(swapper_tsb), REG2; \ + or REG1, %ulo(swapper_tsb), REG1; \ + or REG2, %lo(swapper_tsb), REG2; \ .section .swapper_tsb_phys_patch, "ax"; \ .word 661b; \ .previous; \ -661: nop; \ - .section .tsb_ldquad_phys_patch, "ax"; \ - .word 661b; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - .previous; \ + sllx REG1, 32, REG1; \ + or REG1, REG2, REG1; \ srlx VADDR, PAGE_SHIFT, REG2; \ and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ @@ -281,17 +322,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * we can make use of that for the index computation. */ #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ -661: sethi %hi(swapper_4m_tsb), REG1; \ - or REG1, %lo(swapper_4m_tsb), REG1; \ +661: sethi %uhi(swapper_4m_tsb), REG1; \ + sethi %hi(swapper_4m_tsb), REG2; \ + or REG1, %ulo(swapper_4m_tsb), REG1; \ + or REG2, %lo(swapper_4m_tsb), REG2; \ .section .swapper_4m_tsb_phys_patch, "ax"; \ .word 661b; \ .previous; \ -661: nop; \ - .section .tsb_ldquad_phys_patch, "ax"; \ - .word 661b; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - .previous; \ + sllx REG1, 32, REG1; \ + or REG1, REG2, REG1; \ and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ add REG1, REG2, REG2; \ diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index 6b135a8ab07b..d758c8d8f47d 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -121,12 +121,18 @@ struct vio_disk_attr_info { u8 vdisk_type; #define VD_DISK_TYPE_SLICE 0x01 /* Slice in block device */ #define VD_DISK_TYPE_DISK 0x02 /* Entire block device */ - u16 resv1; + u8 vdisk_mtype; /* v1.1 */ +#define VD_MEDIA_TYPE_FIXED 0x01 /* Fixed device */ +#define VD_MEDIA_TYPE_CD 0x02 /* CD Device */ +#define VD_MEDIA_TYPE_DVD 0x03 /* DVD Device */ + u8 resv1; u32 vdisk_block_size; u64 operations; - u64 vdisk_size; + u64 vdisk_size; /* v1.1 */ u64 max_xfer_size; - u64 resv2[2]; + u32 phys_block_size; /* v1.2 */ + u32 resv2; + u64 resv3[1]; }; struct vio_disk_desc { @@ -272,7 +278,7 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr, unsigned int ring_size) { return (dr->pending - - ((dr->prod - dr->cons) & (ring_size - 1))); + ((dr->prod - dr->cons) & (ring_size - 1)) - 1); } #define VIO_MAX_TYPE_LEN 32 @@ -292,6 +298,7 @@ struct vio_dev { unsigned int tx_irq; unsigned int rx_irq; + u64 rx_ino; struct device dev; }; @@ -447,5 +454,6 @@ int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, char *name); void vio_port_up(struct vio_driver_state *vio); +int vio_set_intr(unsigned long dev_ino, int state); #endif /* _SPARC64_VIO_H */ diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 82a3a71c451e..dfad8b1aea9f 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -494,6 +494,18 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "niagara5"; break; + case SUN4V_CHIP_SPARC_M6: + sparc_cpu_type = "SPARC-M6"; + sparc_fpu_type = "SPARC-M6 integrated FPU"; + sparc_pmu_type = "sparc-m6"; + break; + + case SUN4V_CHIP_SPARC_M7: + sparc_cpu_type = "SPARC-M7"; + sparc_fpu_type = "SPARC-M7 integrated FPU"; + sparc_pmu_type = "sparc-m7"; + break; + case SUN4V_CHIP_SPARC64X: sparc_cpu_type = "SPARC64-X"; sparc_fpu_type = "SPARC64-X integrated FPU"; diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index de1c844dfabc..e69ec0e3f155 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -326,6 +326,8 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) case SUN4V_CHIP_NIAGARA3: case SUN4V_CHIP_NIAGARA4: case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC_M6: + case SUN4V_CHIP_SPARC_M7: case SUN4V_CHIP_SPARC64X: rover_inc_table = niagara_iterate_method; break; diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index dff60abbea01..f87a55d77094 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -1200,14 +1200,14 @@ static int ds_probe(struct vio_dev *vdev, const struct vio_device_id *id) ds_cfg.tx_irq = vdev->tx_irq; ds_cfg.rx_irq = vdev->rx_irq; - lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp); + lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS"); if (IS_ERR(lp)) { err = PTR_ERR(lp); goto out_free_ds_states; } dp->lp = lp; - err = ldc_bind(lp, "DS"); + err = ldc_bind(lp); if (err) goto out_free_ldc; diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 452f04fe8da6..4fdeb8040d4d 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -427,6 +427,12 @@ sun4v_chip_type: cmp %g2, '5' be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA5, %g4 + cmp %g2, '6' + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M6, %g4 + cmp %g2, '7' + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M7, %g4 ba,pt %xcc, 49f nop @@ -585,6 +591,12 @@ niagara_tlb_fixup: cmp %g1, SUN4V_CHIP_NIAGARA5 be,pt %xcc, niagara4_patch nop + cmp %g1, SUN4V_CHIP_SPARC_M6 + be,pt %xcc, niagara4_patch + nop + cmp %g1, SUN4V_CHIP_SPARC_M7 + be,pt %xcc, niagara4_patch + nop call generic_patch_copyops nop diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index c0a2de0fd624..5c55145bfbf0 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -46,6 +46,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_VF_CPU, }, { .group = HV_GRP_KT_CPU, }, { .group = HV_GRP_VT_CPU, }, + { .group = HV_GRP_T5_CPU, }, { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, }; diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index f3ab509b76a8..caedf8320416 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -821,3 +821,19 @@ ENTRY(sun4v_vt_set_perfreg) retl nop ENDPROC(sun4v_vt_set_perfreg) + +ENTRY(sun4v_t5_get_perfreg) + mov %o1, %o4 + mov HV_FAST_T5_GET_PERFREG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_t5_get_perfreg) + +ENTRY(sun4v_t5_set_perfreg) + mov HV_FAST_T5_SET_PERFREG, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_t5_set_perfreg) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 7f08ec8a7c68..28fed53b13a0 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -278,7 +278,8 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len, } order = get_order(len_total); - if ((va = __get_free_pages(GFP_KERNEL|__GFP_COMP, order)) == 0) + va = __get_free_pages(gfp, order); + if (va == 0) goto err_nopages; if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) @@ -443,7 +444,7 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len, } order = get_order(len_total); - va = (void *) __get_free_pages(GFP_KERNEL, order); + va = (void *) __get_free_pages(gfp, order); if (va == NULL) { printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT); goto err_nopages; diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 666193f4e8bb..4033c23bdfa6 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -47,8 +47,6 @@ #include "cpumap.h" #include "kstack.h" -#define NUM_IVECS (IMAP_INR + 1) - struct ino_bucket *ivector_table; unsigned long ivector_table_pa; @@ -107,55 +105,196 @@ static void bucket_set_irq(unsigned long bucket_pa, unsigned int irq) #define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa) -static struct { - unsigned int dev_handle; - unsigned int dev_ino; - unsigned int in_use; -} irq_table[NR_IRQS]; -static DEFINE_SPINLOCK(irq_alloc_lock); +static unsigned long hvirq_major __initdata; +static int __init early_hvirq_major(char *p) +{ + int rc = kstrtoul(p, 10, &hvirq_major); + + return rc; +} +early_param("hvirq", early_hvirq_major); + +static int hv_irq_version; + +/* Major version 2.0 of HV_GRP_INTR added support for the VIRQ cookie + * based interfaces, but: + * + * 1) Several OSs, Solaris and Linux included, use them even when only + * negotiating version 1.0 (or failing to negotiate at all). So the + * hypervisor has a workaround that provides the VIRQ interfaces even + * when only verion 1.0 of the API is in use. + * + * 2) Second, and more importantly, with major version 2.0 these VIRQ + * interfaces only were actually hooked up for LDC interrupts, even + * though the Hypervisor specification clearly stated: + * + * The new interrupt API functions will be available to a guest + * when it negotiates version 2.0 in the interrupt API group 0x2. When + * a guest negotiates version 2.0, all interrupt sources will only + * support using the cookie interface, and any attempt to use the + * version 1.0 interrupt APIs numbered 0xa0 to 0xa6 will result in the + * ENOTSUPPORTED error being returned. + * + * with an emphasis on "all interrupt sources". + * + * To correct this, major version 3.0 was created which does actually + * support VIRQs for all interrupt sources (not just LDC devices). So + * if we want to move completely over the cookie based VIRQs we must + * negotiate major version 3.0 or later of HV_GRP_INTR. + */ +static bool sun4v_cookie_only_virqs(void) +{ + if (hv_irq_version >= 3) + return true; + return false; +} -unsigned char irq_alloc(unsigned int dev_handle, unsigned int dev_ino) +static void __init irq_init_hv(void) { - unsigned long flags; - unsigned char ent; + unsigned long hv_error, major, minor = 0; + + if (tlb_type != hypervisor) + return; - BUILD_BUG_ON(NR_IRQS >= 256); + if (hvirq_major) + major = hvirq_major; + else + major = 3; - spin_lock_irqsave(&irq_alloc_lock, flags); + hv_error = sun4v_hvapi_register(HV_GRP_INTR, major, &minor); + if (!hv_error) + hv_irq_version = major; + else + hv_irq_version = 1; - for (ent = 1; ent < NR_IRQS; ent++) { - if (!irq_table[ent].in_use) + pr_info("SUN4V: Using IRQ API major %d, cookie only virqs %s\n", + hv_irq_version, + sun4v_cookie_only_virqs() ? "enabled" : "disabled"); +} + +/* This function is for the timer interrupt.*/ +int __init arch_probe_nr_irqs(void) +{ + return 1; +} + +#define DEFAULT_NUM_IVECS (0xfffU) +static unsigned int nr_ivec = DEFAULT_NUM_IVECS; +#define NUM_IVECS (nr_ivec) + +static unsigned int __init size_nr_ivec(void) +{ + if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + /* Athena's devhandle|devino is large.*/ + case SUN4V_CHIP_SPARC64X: + nr_ivec = 0xffff; break; + } } - if (ent >= NR_IRQS) { - printk(KERN_ERR "IRQ: Out of virtual IRQs.\n"); - ent = 0; - } else { - irq_table[ent].dev_handle = dev_handle; - irq_table[ent].dev_ino = dev_ino; - irq_table[ent].in_use = 1; - } + return nr_ivec; +} + +struct irq_handler_data { + union { + struct { + unsigned int dev_handle; + unsigned int dev_ino; + }; + unsigned long sysino; + }; + struct ino_bucket bucket; + unsigned long iclr; + unsigned long imap; +}; + +static inline unsigned int irq_data_to_handle(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; + + return ihd->dev_handle; +} + +static inline unsigned int irq_data_to_ino(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; - spin_unlock_irqrestore(&irq_alloc_lock, flags); + return ihd->dev_ino; +} + +static inline unsigned long irq_data_to_sysino(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; - return ent; + return ihd->sysino; } -#ifdef CONFIG_PCI_MSI void irq_free(unsigned int irq) { - unsigned long flags; + void *data = irq_get_handler_data(irq); - if (irq >= NR_IRQS) - return; + kfree(data); + irq_set_handler_data(irq, NULL); + irq_free_descs(irq, 1); +} - spin_lock_irqsave(&irq_alloc_lock, flags); +unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino) +{ + int irq; - irq_table[irq].in_use = 0; + irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL); + if (irq <= 0) + goto out; - spin_unlock_irqrestore(&irq_alloc_lock, flags); + return irq; +out: + return 0; +} + +static unsigned int cookie_exists(u32 devhandle, unsigned int devino) +{ + unsigned long hv_err, cookie; + struct ino_bucket *bucket; + unsigned int irq = 0U; + + hv_err = sun4v_vintr_get_cookie(devhandle, devino, &cookie); + if (hv_err) { + pr_err("HV get cookie failed hv_err = %ld\n", hv_err); + goto out; + } + + if (cookie & ((1UL << 63UL))) { + cookie = ~cookie; + bucket = (struct ino_bucket *) __va(cookie); + irq = bucket->__irq; + } +out: + return irq; +} + +static unsigned int sysino_exists(u32 devhandle, unsigned int devino) +{ + unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); + struct ino_bucket *bucket; + unsigned int irq; + + bucket = &ivector_table[sysino]; + irq = bucket_get_irq(__pa(bucket)); + + return irq; +} + +void ack_bad_irq(unsigned int irq) +{ + pr_crit("BAD IRQ ack %d\n", irq); +} + +void irq_install_pre_handler(int irq, + void (*func)(unsigned int, void *, void *), + void *arg1, void *arg2) +{ + pr_warn("IRQ pre handler NOT supported.\n"); } -#endif /* * /proc/interrupts printing: @@ -206,15 +345,6 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) return tid; } -struct irq_handler_data { - unsigned long iclr; - unsigned long imap; - - void (*pre_handler)(unsigned int, void *, void *); - void *arg1; - void *arg2; -}; - #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity) { @@ -316,8 +446,8 @@ static void sun4u_irq_eoi(struct irq_data *data) static void sun4v_irq_enable(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; unsigned long cpuid = irq_choose_cpu(data->irq, data->affinity); + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_settarget(ino, cpuid); @@ -337,8 +467,8 @@ static void sun4v_irq_enable(struct irq_data *data) static int sun4v_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - unsigned int ino = irq_table[data->irq].dev_ino; unsigned long cpuid = irq_choose_cpu(data->irq, mask); + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_settarget(ino, cpuid); @@ -351,7 +481,7 @@ static int sun4v_set_affinity(struct irq_data *data, static void sun4v_irq_disable(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED); @@ -362,7 +492,7 @@ static void sun4v_irq_disable(struct irq_data *data) static void sun4v_irq_eoi(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); @@ -373,14 +503,13 @@ static void sun4v_irq_eoi(struct irq_data *data) static void sun4v_virq_enable(struct irq_data *data) { - unsigned long cpuid, dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); + unsigned long cpuid; int err; cpuid = irq_choose_cpu(data->irq, data->affinity); - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); if (err != HV_EOK) printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " @@ -403,14 +532,13 @@ static void sun4v_virq_enable(struct irq_data *data) static int sun4v_virt_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - unsigned long cpuid, dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); + unsigned long cpuid; int err; cpuid = irq_choose_cpu(data->irq, mask); - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); if (err != HV_EOK) printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " @@ -422,11 +550,10 @@ static int sun4v_virt_set_affinity(struct irq_data *data, static void sun4v_virq_disable(struct irq_data *data) { - unsigned long dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); int err; - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; err = sun4v_vintr_set_valid(dev_handle, dev_ino, HV_INTR_DISABLED); @@ -438,12 +565,10 @@ static void sun4v_virq_disable(struct irq_data *data) static void sun4v_virq_eoi(struct irq_data *data) { - unsigned long dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); int err; - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_state(dev_handle, dev_ino, HV_INTR_STATE_IDLE); if (err != HV_EOK) @@ -479,31 +604,10 @@ static struct irq_chip sun4v_virq = { .flags = IRQCHIP_EOI_IF_HANDLED, }; -static void pre_flow_handler(struct irq_data *d) -{ - struct irq_handler_data *handler_data = irq_data_get_irq_handler_data(d); - unsigned int ino = irq_table[d->irq].dev_ino; - - handler_data->pre_handler(ino, handler_data->arg1, handler_data->arg2); -} - -void irq_install_pre_handler(int irq, - void (*func)(unsigned int, void *, void *), - void *arg1, void *arg2) -{ - struct irq_handler_data *handler_data = irq_get_handler_data(irq); - - handler_data->pre_handler = func; - handler_data->arg1 = arg1; - handler_data->arg2 = arg2; - - __irq_set_preflow_handler(irq, pre_flow_handler); -} - unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap) { - struct ino_bucket *bucket; struct irq_handler_data *handler_data; + struct ino_bucket *bucket; unsigned int irq; int ino; @@ -537,119 +641,166 @@ out: return irq; } -static unsigned int sun4v_build_common(unsigned long sysino, - struct irq_chip *chip) +static unsigned int sun4v_build_common(u32 devhandle, unsigned int devino, + void (*handler_data_init)(struct irq_handler_data *data, + u32 devhandle, unsigned int devino), + struct irq_chip *chip) { - struct ino_bucket *bucket; - struct irq_handler_data *handler_data; + struct irq_handler_data *data; unsigned int irq; - BUG_ON(tlb_type != hypervisor); + irq = irq_alloc(devhandle, devino); + if (!irq) + goto out; - bucket = &ivector_table[sysino]; - irq = bucket_get_irq(__pa(bucket)); - if (!irq) { - irq = irq_alloc(0, sysino); - bucket_set_irq(__pa(bucket), irq); - irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, - "IVEC"); + data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); + if (unlikely(!data)) { + pr_err("IRQ handler data allocation failed.\n"); + irq_free(irq); + irq = 0; + goto out; } - handler_data = irq_get_handler_data(irq); - if (unlikely(handler_data)) - goto out; + irq_set_handler_data(irq, data); + handler_data_init(data, devhandle, devino); + irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, "IVEC"); + data->imap = ~0UL; + data->iclr = ~0UL; +out: + return irq; +} - handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); - if (unlikely(!handler_data)) { - prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n"); - prom_halt(); - } - irq_set_handler_data(irq, handler_data); +static unsigned long cookie_assign(unsigned int irq, u32 devhandle, + unsigned int devino) +{ + struct irq_handler_data *ihd = irq_get_handler_data(irq); + unsigned long hv_error, cookie; - /* Catch accidental accesses to these things. IMAP/ICLR handling - * is done by hypervisor calls on sun4v platforms, not by direct - * register accesses. + /* handler_irq needs to find the irq. cookie is seen signed in + * sun4v_dev_mondo and treated as a non ivector_table delivery. */ - handler_data->imap = ~0UL; - handler_data->iclr = ~0UL; + ihd->bucket.__irq = irq; + cookie = ~__pa(&ihd->bucket); -out: - return irq; + hv_error = sun4v_vintr_set_cookie(devhandle, devino, cookie); + if (hv_error) + pr_err("HV vintr set cookie failed = %ld\n", hv_error); + + return hv_error; } -unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) +static void cookie_handler_data(struct irq_handler_data *data, + u32 devhandle, unsigned int devino) { - unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); + data->dev_handle = devhandle; + data->dev_ino = devino; +} - return sun4v_build_common(sysino, &sun4v_irq); +static unsigned int cookie_build_irq(u32 devhandle, unsigned int devino, + struct irq_chip *chip) +{ + unsigned long hv_error; + unsigned int irq; + + irq = sun4v_build_common(devhandle, devino, cookie_handler_data, chip); + + hv_error = cookie_assign(irq, devhandle, devino); + if (hv_error) { + irq_free(irq); + irq = 0; + } + + return irq; } -unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) +static unsigned int sun4v_build_cookie(u32 devhandle, unsigned int devino) { - struct irq_handler_data *handler_data; - unsigned long hv_err, cookie; - struct ino_bucket *bucket; unsigned int irq; - bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC); - if (unlikely(!bucket)) - return 0; + irq = cookie_exists(devhandle, devino); + if (irq) + goto out; - /* The only reference we store to the IRQ bucket is - * by physical address which kmemleak can't see, tell - * it that this object explicitly is not a leak and - * should be scanned. - */ - kmemleak_not_leak(bucket); + irq = cookie_build_irq(devhandle, devino, &sun4v_virq); - __flush_dcache_range((unsigned long) bucket, - ((unsigned long) bucket + - sizeof(struct ino_bucket))); +out: + return irq; +} - irq = irq_alloc(devhandle, devino); +static void sysino_set_bucket(unsigned int irq) +{ + struct irq_handler_data *ihd = irq_get_handler_data(irq); + struct ino_bucket *bucket; + unsigned long sysino; + + sysino = sun4v_devino_to_sysino(ihd->dev_handle, ihd->dev_ino); + BUG_ON(sysino >= nr_ivec); + bucket = &ivector_table[sysino]; bucket_set_irq(__pa(bucket), irq); +} - irq_set_chip_and_handler_name(irq, &sun4v_virq, handle_fasteoi_irq, - "IVEC"); +static void sysino_handler_data(struct irq_handler_data *data, + u32 devhandle, unsigned int devino) +{ + unsigned long sysino; - handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); - if (unlikely(!handler_data)) - return 0; + sysino = sun4v_devino_to_sysino(devhandle, devino); + data->sysino = sysino; +} - /* In order to make the LDC channel startup sequence easier, - * especially wrt. locking, we do not let request_irq() enable - * the interrupt. - */ - irq_set_status_flags(irq, IRQ_NOAUTOEN); - irq_set_handler_data(irq, handler_data); +static unsigned int sysino_build_irq(u32 devhandle, unsigned int devino, + struct irq_chip *chip) +{ + unsigned int irq; - /* Catch accidental accesses to these things. IMAP/ICLR handling - * is done by hypervisor calls on sun4v platforms, not by direct - * register accesses. - */ - handler_data->imap = ~0UL; - handler_data->iclr = ~0UL; + irq = sun4v_build_common(devhandle, devino, sysino_handler_data, chip); + if (!irq) + goto out; - cookie = ~__pa(bucket); - hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie); - if (hv_err) { - prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] " - "err=%lu\n", devhandle, devino, hv_err); - prom_halt(); - } + sysino_set_bucket(irq); +out: + return irq; +} +static int sun4v_build_sysino(u32 devhandle, unsigned int devino) +{ + int irq; + + irq = sysino_exists(devhandle, devino); + if (irq) + goto out; + + irq = sysino_build_irq(devhandle, devino, &sun4v_irq); +out: return irq; } -void ack_bad_irq(unsigned int irq) +unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) { - unsigned int ino = irq_table[irq].dev_ino; + unsigned int irq; - if (!ino) - ino = 0xdeadbeef; + if (sun4v_cookie_only_virqs()) + irq = sun4v_build_cookie(devhandle, devino); + else + irq = sun4v_build_sysino(devhandle, devino); - printk(KERN_CRIT "Unexpected IRQ from ino[%x] irq[%u]\n", - ino, irq); + return irq; +} + +unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) +{ + int irq; + + irq = cookie_build_irq(devhandle, devino, &sun4v_virq); + if (!irq) + goto out; + + /* This is borrowed from the original function. + */ + irq_set_status_flags(irq, IRQ_NOAUTOEN); + +out: + return irq; } void *hardirq_stack[NR_CPUS]; @@ -720,9 +871,12 @@ void fixup_irqs(void) for (irq = 0; irq < NR_IRQS; irq++) { struct irq_desc *desc = irq_to_desc(irq); - struct irq_data *data = irq_desc_get_irq_data(desc); + struct irq_data *data; unsigned long flags; + if (!desc) + continue; + data = irq_desc_get_irq_data(desc); raw_spin_lock_irqsave(&desc->lock, flags); if (desc->action && !irqd_is_per_cpu(data)) { if (data->chip->irq_set_affinity) @@ -922,16 +1076,22 @@ static struct irqaction timer_irq_action = { .name = "timer", }; -/* Only invoked on boot processor. */ -void __init init_IRQ(void) +static void __init irq_ivector_init(void) { - unsigned long size; + unsigned long size, order; + unsigned int ivecs; - map_prom_timers(); - kill_prom_timer(); + /* If we are doing cookie only VIRQs then we do not need the ivector + * table to process interrupts. + */ + if (sun4v_cookie_only_virqs()) + return; - size = sizeof(struct ino_bucket) * NUM_IVECS; - ivector_table = kzalloc(size, GFP_KERNEL); + ivecs = size_nr_ivec(); + size = sizeof(struct ino_bucket) * ivecs; + order = get_order(size); + ivector_table = (struct ino_bucket *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!ivector_table) { prom_printf("Fatal error, cannot allocate ivector_table\n"); prom_halt(); @@ -940,6 +1100,15 @@ void __init init_IRQ(void) ((unsigned long) ivector_table) + size); ivector_table_pa = __pa(ivector_table); +} + +/* Only invoked on boot processor.*/ +void __init init_IRQ(void) +{ + irq_init_hv(); + irq_ivector_init(); + map_prom_timers(); + kill_prom_timer(); if (tlb_type == hypervisor) sun4v_init_mondo_queues(); diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 605d49204580..ef0d8e9e1210 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) TSB_LOCK_TAG(%g1, %g2, %g7) - - /* Load and check PTE. */ - ldxa [%g5] ASI_PHYS_USE_EC, %g5 - mov 1, %g7 - sllx %g7, TSB_TAG_INVALID_BIT, %g7 - brgez,a,pn %g5, kvmap_itlb_longpath - TSB_STORE(%g1, %g7) - TSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ @@ -118,6 +110,12 @@ kvmap_dtlb_obp: ba,pt %xcc, kvmap_dtlb_load nop +kvmap_linear_early: + sethi %hi(kern_linear_pte_xor), %g7 + ldx [%g7 + %lo(kern_linear_pte_xor)], %g2 + ba,pt %xcc, kvmap_dtlb_tsb4m_load + xor %g2, %g4, %g5 + .align 32 kvmap_dtlb_tsb4m_load: TSB_LOCK_TAG(%g1, %g2, %g7) @@ -146,105 +144,17 @@ kvmap_dtlb_4v: /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */ KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) #endif - /* TSB entry address left in %g1, lookup linear PTE. - * Must preserve %g1 and %g6 (TAG). - */ -kvmap_dtlb_tsb4m_miss: - /* Clear the PAGE_OFFSET top virtual bits, shift - * down to get PFN, and make sure PFN is in range. - */ -661: sllx %g4, 0, %g5 - .section .page_offset_shift_patch, "ax" - .word 661b - .previous - - /* Check to see if we know about valid memory at the 4MB - * chunk this physical address will reside within. + /* Linear mapping TSB lookup failed. Fallthrough to kernel + * page table based lookup. */ -661: srlx %g5, MAX_PHYS_ADDRESS_BITS, %g2 - .section .page_offset_shift_patch, "ax" - .word 661b - .previous - - brnz,pn %g2, kvmap_dtlb_longpath - nop - - /* This unconditional branch and delay-slot nop gets patched - * by the sethi sequence once the bitmap is properly setup. - */ - .globl valid_addr_bitmap_insn -valid_addr_bitmap_insn: - ba,pt %xcc, 2f - nop - .subsection 2 - .globl valid_addr_bitmap_patch -valid_addr_bitmap_patch: - sethi %hi(sparc64_valid_addr_bitmap), %g7 - or %g7, %lo(sparc64_valid_addr_bitmap), %g7 - .previous - -661: srlx %g5, ILOG2_4MB, %g2 - .section .page_offset_shift_patch, "ax" - .word 661b - .previous - - srlx %g2, 6, %g5 - and %g2, 63, %g2 - sllx %g5, 3, %g5 - ldx [%g7 + %g5], %g5 - mov 1, %g7 - sllx %g7, %g2, %g7 - andcc %g5, %g7, %g0 - be,pn %xcc, kvmap_dtlb_longpath - -2: sethi %hi(kpte_linear_bitmap), %g2 - - /* Get the 256MB physical address index. */ -661: sllx %g4, 0, %g5 - .section .page_offset_shift_patch, "ax" - .word 661b - .previous - - or %g2, %lo(kpte_linear_bitmap), %g2 - -661: srlx %g5, ILOG2_256MB, %g5 - .section .page_offset_shift_patch, "ax" - .word 661b - .previous - - and %g5, (32 - 1), %g7 - - /* Divide by 32 to get the offset into the bitmask. */ - srlx %g5, 5, %g5 - add %g7, %g7, %g7 - sllx %g5, 3, %g5 - - /* kern_linear_pte_xor[(mask >> shift) & 3)] */ - ldx [%g2 + %g5], %g2 - srlx %g2, %g7, %g7 - sethi %hi(kern_linear_pte_xor), %g5 - and %g7, 3, %g7 - or %g5, %lo(kern_linear_pte_xor), %g5 - sllx %g7, 3, %g7 - ldx [%g5 + %g7], %g2 - .globl kvmap_linear_patch kvmap_linear_patch: - ba,pt %xcc, kvmap_dtlb_tsb4m_load - xor %g2, %g4, %g5 + ba,a,pt %xcc, kvmap_linear_early kvmap_dtlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) TSB_LOCK_TAG(%g1, %g2, %g7) - - /* Load and check PTE. */ - ldxa [%g5] ASI_PHYS_USE_EC, %g5 - mov 1, %g7 - sllx %g7, TSB_TAG_INVALID_BIT, %g7 - brgez,a,pn %g5, kvmap_dtlb_longpath - TSB_STORE(%g1, %g7) - TSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ @@ -276,13 +186,8 @@ kvmap_dtlb_load: #ifdef CONFIG_SPARSEMEM_VMEMMAP kvmap_vmemmap: - sub %g4, %g5, %g5 - srlx %g5, ILOG2_4MB, %g5 - sethi %hi(vmemmap_table), %g1 - sllx %g5, 3, %g5 - or %g1, %lo(vmemmap_table), %g1 - ba,pt %xcc, kvmap_dtlb_load - ldx [%g1 + %g5], %g5 + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) + ba,a,pt %xcc, kvmap_dtlb_load #endif kvmap_dtlb_nonlinear: @@ -294,8 +199,8 @@ kvmap_dtlb_nonlinear: #ifdef CONFIG_SPARSEMEM_VMEMMAP /* Do not use the TSB for vmemmap. */ - mov (VMEMMAP_BASE >> 40), %g5 - sllx %g5, 40, %g5 + sethi %hi(VMEMMAP_BASE), %g5 + ldx [%g5 + %lo(VMEMMAP_BASE)], %g5 cmp %g4,%g5 bgeu,pn %xcc, kvmap_vmemmap nop @@ -307,8 +212,8 @@ kvmap_dtlb_tsbmiss: sethi %hi(MODULES_VADDR), %g5 cmp %g4, %g5 blu,pn %xcc, kvmap_dtlb_longpath - mov (VMALLOC_END >> 40), %g5 - sllx %g5, 40, %g5 + sethi %hi(VMALLOC_END), %g5 + ldx [%g5 + %lo(VMALLOC_END)], %g5 cmp %g4, %g5 bgeu,pn %xcc, kvmap_dtlb_longpath nop diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 0af28b984695..4310332872d4 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1078,7 +1078,8 @@ static void ldc_iommu_release(struct ldc_channel *lp) struct ldc_channel *ldc_alloc(unsigned long id, const struct ldc_channel_config *cfgp, - void *event_arg) + void *event_arg, + const char *name) { struct ldc_channel *lp; const struct ldc_mode_ops *mops; @@ -1093,6 +1094,8 @@ struct ldc_channel *ldc_alloc(unsigned long id, err = -EINVAL; if (!cfgp) goto out_err; + if (!name) + goto out_err; switch (cfgp->mode) { case LDC_MODE_RAW: @@ -1185,6 +1188,21 @@ struct ldc_channel *ldc_alloc(unsigned long id, INIT_HLIST_HEAD(&lp->mh_list); + snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); + snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); + + err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, + lp->rx_irq_name, lp); + if (err) + goto out_free_txq; + + err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, + lp->tx_irq_name, lp); + if (err) { + free_irq(lp->cfg.rx_irq, lp); + goto out_free_txq; + } + return lp; out_free_txq: @@ -1237,31 +1255,14 @@ EXPORT_SYMBOL(ldc_free); * state. This does not initiate a handshake, ldc_connect() does * that. */ -int ldc_bind(struct ldc_channel *lp, const char *name) +int ldc_bind(struct ldc_channel *lp) { unsigned long hv_err, flags; int err = -EINVAL; - if (!name || - (lp->state != LDC_STATE_INIT)) + if (lp->state != LDC_STATE_INIT) return -EINVAL; - snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); - snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); - - err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, - lp->rx_irq_name, lp); - if (err) - return err; - - err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, - lp->tx_irq_name, lp); - if (err) { - free_irq(lp->cfg.rx_irq, lp); - return err; - } - - spin_lock_irqsave(&lp->lock, flags); enable_irq(lp->cfg.rx_irq); diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 683c4af999de..9bbb8f2bbfcc 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -37,6 +37,7 @@ unsigned long amba_system_id; static DEFINE_SPINLOCK(leon_irq_lock); static unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */ +static unsigned long leon3_gptimer_ackmask; /* For clearing pending bit */ unsigned long leon3_gptimer_irq; /* interrupt controller irq number */ unsigned int sparc_leon_eirq; #define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu]) @@ -260,11 +261,19 @@ void leon_update_virq_handling(unsigned int virq, static u32 leon_cycles_offset(void) { - u32 rld, val, off; + u32 rld, val, ctrl, off; + rld = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld); val = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val); - off = rld - val; - return rld - val; + ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl); + if (LEON3_GPTIMER_CTRL_ISPENDING(ctrl)) { + val = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val); + off = 2 * rld - val; + } else { + off = rld - val; + } + + return off; } #ifdef CONFIG_SMP @@ -302,6 +311,7 @@ void __init leon_init_timers(void) int ampopts; int err; u32 config; + u32 ctrl; sparc_config.get_cycles_offset = leon_cycles_offset; sparc_config.cs_period = 1000000 / HZ; @@ -374,6 +384,16 @@ void __init leon_init_timers(void) if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq)) goto bad; + ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, + ctrl | LEON3_GPTIMER_CTRL_PENDING); + ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl); + + if ((ctrl & LEON3_GPTIMER_CTRL_PENDING) != 0) + leon3_gptimer_ackmask = ~LEON3_GPTIMER_CTRL_PENDING; + else + leon3_gptimer_ackmask = ~0; + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0); LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld, (((1000000 / HZ) - 1))); @@ -452,6 +472,11 @@ bad: static void leon_clear_clock_irq(void) { + u32 ctrl; + + ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, + ctrl & leon3_gptimer_ackmask); } static void leon_load_profile_irq(int cpu, unsigned int limit) diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 269af58497aa..7e967c8018c8 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -191,12 +191,41 @@ static const struct pcr_ops n4_pcr_ops = { .pcr_nmi_disable = PCR_N4_PICNPT, }; +static u64 n5_pcr_read(unsigned long reg_num) +{ + unsigned long val; + + (void) sun4v_t5_get_perfreg(reg_num, &val); + + return val; +} + +static void n5_pcr_write(unsigned long reg_num, u64 val) +{ + (void) sun4v_t5_set_perfreg(reg_num, val); +} + +static const struct pcr_ops n5_pcr_ops = { + .read_pcr = n5_pcr_read, + .write_pcr = n5_pcr_write, + .read_pic = n4_pic_read, + .write_pic = n4_pic_write, + .nmi_picl_value = n4_picl_value, + .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | + PCR_N4_UTRACE | PCR_N4_TOE | + (26 << PCR_N4_SL_SHIFT)), + .pcr_nmi_disable = PCR_N4_PICNPT, +}; + + static unsigned long perf_hsvc_group; static unsigned long perf_hsvc_major; static unsigned long perf_hsvc_minor; static int __init register_perf_hsvc(void) { + unsigned long hverror; + if (tlb_type == hypervisor) { switch (sun4v_chip_type) { case SUN4V_CHIP_NIAGARA1: @@ -215,6 +244,10 @@ static int __init register_perf_hsvc(void) perf_hsvc_group = HV_GRP_VT_CPU; break; + case SUN4V_CHIP_NIAGARA5: + perf_hsvc_group = HV_GRP_T5_CPU; + break; + default: return -ENODEV; } @@ -222,10 +255,12 @@ static int __init register_perf_hsvc(void) perf_hsvc_major = 1; perf_hsvc_minor = 0; - if (sun4v_hvapi_register(perf_hsvc_group, - perf_hsvc_major, - &perf_hsvc_minor)) { - printk("perfmon: Could not register hvapi.\n"); + hverror = sun4v_hvapi_register(perf_hsvc_group, + perf_hsvc_major, + &perf_hsvc_minor); + if (hverror) { + pr_err("perfmon: Could not register hvapi(0x%lx).\n", + hverror); return -ENODEV; } } @@ -254,6 +289,10 @@ static int __init setup_sun4v_pcr_ops(void) pcr_ops = &n4_pcr_ops; break; + case SUN4V_CHIP_NIAGARA5: + pcr_ops = &n5_pcr_ops; + break; + default: ret = -ENODEV; break; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d35c490a91cb..c9759ad3f34a 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1662,7 +1662,8 @@ static bool __init supported_pmu(void) sparc_pmu = &niagara2_pmu; return true; } - if (!strcmp(sparc_pmu_type, "niagara4")) { + if (!strcmp(sparc_pmu_type, "niagara4") || + !strcmp(sparc_pmu_type, "niagara5")) { sparc_pmu = &niagara4_pmu; return true; } diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 3fdb455e3318..e629b8377587 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -141,21 +141,9 @@ static void __init boot_flags_init(char *commands) process_switch(*commands++); continue; } - if (!strncmp(commands, "mem=", 4)) { - /* - * "mem=XXX[kKmM]" overrides the PROM-reported - * memory size. - */ - cmdline_memory_size = simple_strtoul(commands + 4, - &commands, 0); - if (*commands == 'K' || *commands == 'k') { - cmdline_memory_size <<= 10; - commands++; - } else if (*commands=='M' || *commands=='m') { - cmdline_memory_size <<= 20; - commands++; - } - } + if (!strncmp(commands, "mem=", 4)) + cmdline_memory_size = memparse(commands + 4, &commands); + while (*commands && *commands != ' ') commands++; } @@ -500,12 +488,16 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_BLKINIT; if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_N2; } @@ -533,6 +525,8 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | AV_SPARC_ASI_BLK_INIT | @@ -540,6 +534,8 @@ static void __init init_sparc64_elf_hwcap(void) if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | AV_SPARC_FMAF); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index f7ba87543e5f..302c476413d5 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1138,7 +1138,7 @@ static unsigned long penguins_are_doing_time; void smp_capture(void) { - int result = atomic_add_ret(1, &smp_capture_depth); + int result = atomic_add_return(1, &smp_capture_depth); if (result == 1) { int ncpus = num_online_cpus(); @@ -1467,6 +1467,13 @@ static void __init pcpu_populate_pte(unsigned long addr) pud_t *pud; pmd_t *pmd; + if (pgd_none(*pgd)) { + pud_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + pgd_populate(&init_mm, pgd, new); + } + pud = pud_offset(pgd, addr); if (pud_none(*pud)) { pmd_t *new; diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index e0c09bf85610..6179e19bc9b9 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S @@ -195,6 +195,11 @@ sun4v_tsb_miss_common: ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 sun4v_itlb_error: + rdpr %tl, %g1 + cmp %g1, 1 + ble,pt %icc, sun4v_bad_ra + or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1 + sethi %hi(sun4v_err_itlb_vaddr), %g1 stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] sethi %hi(sun4v_err_itlb_ctx), %g1 @@ -206,15 +211,10 @@ sun4v_itlb_error: sethi %hi(sun4v_err_itlb_error), %g1 stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] + sethi %hi(1f), %g7 rdpr %tl, %g4 - cmp %g4, 1 - ble,pt %icc, 1f - sethi %hi(2f), %g7 ba,pt %xcc, etraptl1 - or %g7, %lo(2f), %g7 - -1: ba,pt %xcc, etrap -2: or %g7, %lo(2b), %g7 +1: or %g7, %lo(1f), %g7 mov %l4, %o1 call sun4v_itlb_error_report add %sp, PTREGS_OFF, %o0 @@ -222,6 +222,11 @@ sun4v_itlb_error: /* NOTREACHED */ sun4v_dtlb_error: + rdpr %tl, %g1 + cmp %g1, 1 + ble,pt %icc, sun4v_bad_ra + or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1 + sethi %hi(sun4v_err_dtlb_vaddr), %g1 stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] sethi %hi(sun4v_err_dtlb_ctx), %g1 @@ -233,21 +238,23 @@ sun4v_dtlb_error: sethi %hi(sun4v_err_dtlb_error), %g1 stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] + sethi %hi(1f), %g7 rdpr %tl, %g4 - cmp %g4, 1 - ble,pt %icc, 1f - sethi %hi(2f), %g7 ba,pt %xcc, etraptl1 - or %g7, %lo(2f), %g7 - -1: ba,pt %xcc, etrap -2: or %g7, %lo(2b), %g7 +1: or %g7, %lo(1f), %g7 mov %l4, %o1 call sun4v_dtlb_error_report add %sp, PTREGS_OFF, %o0 /* NOTREACHED */ +sun4v_bad_ra: + or %g0, %g4, %g5 + ba,pt %xcc, sparc64_realfault_common + or %g1, %g0, %g4 + + /* NOTREACHED */ + /* Instruction Access Exception, tl0. */ sun4v_iacc: ldxa [%g0] ASI_SCRATCHPAD, %g2 diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index fb6640ec8557..981a769b9558 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2104,6 +2104,11 @@ void sun4v_nonresum_overflow(struct pt_regs *regs) atomic_inc(&sun4v_nonresum_oflow_cnt); } +static void sun4v_tlb_error(struct pt_regs *regs) +{ + die_if_kernel("TLB/TSB error", regs); +} + unsigned long sun4v_err_itlb_vaddr; unsigned long sun4v_err_itlb_ctx; unsigned long sun4v_err_itlb_pte; @@ -2111,8 +2116,7 @@ unsigned long sun4v_err_itlb_error; void sun4v_itlb_error_report(struct pt_regs *regs, int tl) { - if (tl > 1) - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); @@ -2125,7 +2129,7 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl) sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, sun4v_err_itlb_pte, sun4v_err_itlb_error); - prom_halt(); + sun4v_tlb_error(regs); } unsigned long sun4v_err_dtlb_vaddr; @@ -2135,8 +2139,7 @@ unsigned long sun4v_err_dtlb_error; void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) { - if (tl > 1) - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); @@ -2149,7 +2152,7 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, sun4v_err_dtlb_pte, sun4v_err_dtlb_error); - prom_halt(); + sun4v_tlb_error(regs); } void hypervisor_tlbop_error(unsigned long err, unsigned long op) diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index 8647fcc5ca6c..cb5789c9f961 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -180,8 +180,10 @@ static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, vdev->tx_irq = sun4v_build_virq(cdev_cfg_handle, *irq); irq = mdesc_get_property(hp, target, "rx-ino", NULL); - if (irq) + if (irq) { vdev->rx_irq = sun4v_build_virq(cdev_cfg_handle, *irq); + vdev->rx_ino = *irq; + } chan_id = mdesc_get_property(hp, target, "id", NULL); if (chan_id) @@ -189,6 +191,15 @@ static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, } } +int vio_set_intr(unsigned long dev_ino, int state) +{ + int err; + + err = sun4v_vintr_set_valid(cdev_cfg_handle, dev_ino, state); + return err; +} +EXPORT_SYMBOL(vio_set_intr); + static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, struct device *parent) { diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index 7ef081a185b1..526fcb5d8ce9 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -724,7 +724,7 @@ int vio_ldc_alloc(struct vio_driver_state *vio, cfg.tx_irq = vio->vdev->tx_irq; cfg.rx_irq = vio->vdev->rx_irq; - lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg); + lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name); if (IS_ERR(lp)) return PTR_ERR(lp); @@ -756,7 +756,7 @@ void vio_port_up(struct vio_driver_state *vio) err = 0; if (state == LDC_STATE_INIT) { - err = ldc_bind(vio->lp, vio->name); + err = ldc_bind(vio->lp); if (err) printk(KERN_WARNING "%s: Port %lu bind failed, " "err=%d\n", diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 932ff90fd760..09243057cb0b 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -35,8 +35,9 @@ jiffies = jiffies_64; SECTIONS { - /* swapper_low_pmd_dir is sparc64 only */ - swapper_low_pmd_dir = 0x0000000000402000; +#ifdef CONFIG_SPARC64 + swapper_pg_dir = 0x0000000000402000; +#endif . = INITIAL_ADDRESS; .text TEXTSTART : { @@ -122,11 +123,6 @@ SECTIONS *(.swapper_4m_tsb_phys_patch) __swapper_4m_tsb_phys_patch_end = .; } - .page_offset_shift_patch : { - __page_offset_shift_patch = .; - *(.page_offset_shift_patch) - __page_offset_shift_patch_end = .; - } .popc_3insn_patch : { __popc_3insn_patch = .; *(.popc_3insn_patch) diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index 1d32b54089aa..a7c418ac26af 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -27,18 +27,23 @@ static DEFINE_SPINLOCK(dummy); #endif /* SMP */ -int __atomic_add_return(int i, atomic_t *v) -{ - int ret; - unsigned long flags; - spin_lock_irqsave(ATOMIC_HASH(v), flags); - - ret = (v->counter += i); - - spin_unlock_irqrestore(ATOMIC_HASH(v), flags); - return ret; -} -EXPORT_SYMBOL(__atomic_add_return); +#define ATOMIC_OP(op, cop) \ +int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int ret; \ + unsigned long flags; \ + spin_lock_irqsave(ATOMIC_HASH(v), flags); \ + \ + ret = (v->counter cop i); \ + \ + spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ + return ret; \ +} \ +EXPORT_SYMBOL(atomic_##op##_return); + +ATOMIC_OP(add, +=) + +#undef ATOMIC_OP int atomic_cmpxchg(atomic_t *v, int old, int new) { diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index 85c233d0a340..05dac43907d1 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -14,109 +14,80 @@ * memory barriers, and a second which returns * a value and does the barriers. */ -ENTRY(atomic_add) /* %o0 = increment, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: lduw [%o1], %g1 - add %g1, %o0, %g7 - cas [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %icc, BACKOFF_LABEL(2f, 1b) - nop - retl - nop -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic_add) -ENTRY(atomic_sub) /* %o0 = decrement, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: lduw [%o1], %g1 - sub %g1, %o0, %g7 - cas [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %icc, BACKOFF_LABEL(2f, 1b) - nop - retl - nop -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic_sub) +#define ATOMIC_OP(op) \ +ENTRY(atomic_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: lduw [%o1], %g1; \ + op %g1, %o0, %g7; \ + cas [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %icc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + nop; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic_##op); \ -ENTRY(atomic_add_ret) /* %o0 = increment, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: lduw [%o1], %g1 - add %g1, %o0, %g7 - cas [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %icc, BACKOFF_LABEL(2f, 1b) - add %g1, %o0, %g1 - retl - sra %g1, 0, %o0 -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic_add_ret) +#define ATOMIC_OP_RETURN(op) \ +ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: lduw [%o1], %g1; \ + op %g1, %o0, %g7; \ + cas [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %icc, BACKOFF_LABEL(2f, 1b); \ + op %g1, %o0, %g1; \ + retl; \ + sra %g1, 0, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic_##op##_return); -ENTRY(atomic_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: lduw [%o1], %g1 - sub %g1, %o0, %g7 - cas [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %icc, BACKOFF_LABEL(2f, 1b) - sub %g1, %o0, %g1 - retl - sra %g1, 0, %o0 -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic_sub_ret) +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) -ENTRY(atomic64_add) /* %o0 = increment, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: ldx [%o1], %g1 - add %g1, %o0, %g7 - casx [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %xcc, BACKOFF_LABEL(2f, 1b) - nop - retl - nop -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic64_add) +ATOMIC_OPS(add) +ATOMIC_OPS(sub) -ENTRY(atomic64_sub) /* %o0 = decrement, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: ldx [%o1], %g1 - sub %g1, %o0, %g7 - casx [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %xcc, BACKOFF_LABEL(2f, 1b) - nop - retl - nop -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic64_sub) +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP -ENTRY(atomic64_add_ret) /* %o0 = increment, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: ldx [%o1], %g1 - add %g1, %o0, %g7 - casx [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %xcc, BACKOFF_LABEL(2f, 1b) - nop - retl - add %g1, %o0, %o0 -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic64_add_ret) +#define ATOMIC64_OP(op) \ +ENTRY(atomic64_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: ldx [%o1], %g1; \ + op %g1, %o0, %g7; \ + casx [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + nop; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic64_##op); \ -ENTRY(atomic64_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */ - BACKOFF_SETUP(%o2) -1: ldx [%o1], %g1 - sub %g1, %o0, %g7 - casx [%o1], %g1, %g7 - cmp %g1, %g7 - bne,pn %xcc, BACKOFF_LABEL(2f, 1b) - nop - retl - sub %g1, %o0, %o0 -2: BACKOFF_SPIN(%o2, %o3, 1b) -ENDPROC(atomic64_sub_ret) +#define ATOMIC64_OP_RETURN(op) \ +ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: ldx [%o1], %g1; \ + op %g1, %o0, %g7; \ + casx [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + op %g1, %o0, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic64_##op##_return); + +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) + +ATOMIC64_OPS(add) +ATOMIC64_OPS(sub) + +#undef ATOMIC64_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */ BACKOFF_SETUP(%o2) diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 323335b9cd2b..1d649a95660c 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -99,14 +99,23 @@ EXPORT_SYMBOL(___copy_in_user); EXPORT_SYMBOL(__clear_user); /* Atomic counter implementation. */ -EXPORT_SYMBOL(atomic_add); -EXPORT_SYMBOL(atomic_add_ret); -EXPORT_SYMBOL(atomic_sub); -EXPORT_SYMBOL(atomic_sub_ret); -EXPORT_SYMBOL(atomic64_add); -EXPORT_SYMBOL(atomic64_add_ret); -EXPORT_SYMBOL(atomic64_sub); -EXPORT_SYMBOL(atomic64_sub_ret); +#define ATOMIC_OP(op) \ +EXPORT_SYMBOL(atomic_##op); \ +EXPORT_SYMBOL(atomic64_##op); + +#define ATOMIC_OP_RETURN(op) \ +EXPORT_SYMBOL(atomic_##op##_return); \ +EXPORT_SYMBOL(atomic64_##op##_return); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) + +ATOMIC_OPS(add) +ATOMIC_OPS(sub) + +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + EXPORT_SYMBOL(atomic64_dec_if_positive); /* Atomic bit operations. */ diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index 99c017be8719..f75e6906df14 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -3,8 +3,9 @@ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) * - * Returns 0, if ok, and number of bytes not yet set if exception - * occurs and we were called as clear_user. + * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and + * number of bytes not yet set if exception occurs and we were called as + * clear_user. */ #include <asm/ptrace.h> @@ -65,6 +66,8 @@ __bzero_begin: .globl __memset_start, __memset_end __memset_start: memset: + mov %o0, %g1 + mov 1, %g4 and %o1, 0xff, %g3 sll %g3, 8, %g2 or %g3, %g2, %g3 @@ -89,6 +92,7 @@ memset: sub %o0, %o2, %o0 __bzero: + clr %g4 mov %g0, %g3 1: cmp %o1, 7 @@ -151,8 +155,8 @@ __bzero: bne,a 8f EX(stb %g3, [%o0], and %o1, 1) 8: - retl - clr %o0 + b 0f + nop 7: be 13b orcc %o1, 0, %g0 @@ -164,6 +168,12 @@ __bzero: bne 8b EX(stb %g3, [%o0 - 1], add %o1, 1) 0: + andcc %g4, 1, %g0 + be 5f + nop + retl + mov %g1, %o0 +5: retl clr %o0 __memset_end: diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 587cd0565128..18fcd7167095 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -346,6 +346,9 @@ retry: down_read(&mm->mmap_sem); } + if (fault_code & FAULT_CODE_BAD_RA) + goto do_sigbus; + vma = find_vma(mm, address); if (!vma) goto bad_area; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 98ac8e80adae..2d91c62f7f5f 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -75,7 +75,6 @@ unsigned long kern_linear_pte_xor[4] __read_mostly; * 'cpu' properties, but we need to have this table setup before the * MDESC is initialized. */ -unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; #ifndef CONFIG_DEBUG_PAGEALLOC /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. @@ -84,10 +83,11 @@ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; */ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; #endif +extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; static unsigned long cpu_pgsz_mask; -#define MAX_BANKS 32 +#define MAX_BANKS 1024 static struct linux_prom64_registers pavail[MAX_BANKS]; static int pavail_ents; @@ -165,10 +165,6 @@ static void __init read_obp_memory(const char *property, cmp_p64, NULL); } -unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES / - sizeof(unsigned long)]; -EXPORT_SYMBOL(sparc64_valid_addr_bitmap); - /* Kernel physical address base and size in bytes. */ unsigned long kern_base __read_mostly; unsigned long kern_size __read_mostly; @@ -840,7 +836,10 @@ static int find_node(unsigned long addr) if ((addr & p->mask) == p->val) return i; } - return -1; + /* The following condition has been observed on LDOM guests.*/ + WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" + " rule. Some physical memory will be owned by node 0."); + return 0; } static u64 memblock_nid_range(u64 start, u64 end, int *nid) @@ -1366,9 +1365,144 @@ static unsigned long __init bootmem_init(unsigned long phys_base) static struct linux_prom64_registers pall[MAX_BANKS] __initdata; static int pall_ents __initdata; -#ifdef CONFIG_DEBUG_PAGEALLOC +static unsigned long max_phys_bits = 40; + +bool kern_addr_valid(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if ((long)addr < 0L) { + unsigned long pa = __pa(addr); + + if ((addr >> max_phys_bits) != 0UL) + return false; + + return pfn_valid(pa >> PAGE_SHIFT); + } + + if (addr >= (unsigned long) KERNBASE && + addr < (unsigned long)&_end) + return true; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return 0; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return 0; + + if (pud_large(*pud)) + return pfn_valid(pud_pfn(*pud)); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return 0; + + if (pmd_large(*pmd)) + return pfn_valid(pmd_pfn(*pmd)); + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return 0; + + return pfn_valid(pte_pfn(*pte)); +} +EXPORT_SYMBOL(kern_addr_valid); + +static unsigned long __ref kernel_map_hugepud(unsigned long vstart, + unsigned long vend, + pud_t *pud) +{ + const unsigned long mask16gb = (1UL << 34) - 1UL; + u64 pte_val = vstart; + + /* Each PUD is 8GB */ + if ((vstart & mask16gb) || + (vend - vstart <= mask16gb)) { + pte_val ^= kern_linear_pte_xor[2]; + pud_val(*pud) = pte_val | _PAGE_PUD_HUGE; + + return vstart + PUD_SIZE; + } + + pte_val ^= kern_linear_pte_xor[3]; + pte_val |= _PAGE_PUD_HUGE; + + vend = vstart + mask16gb + 1UL; + while (vstart < vend) { + pud_val(*pud) = pte_val; + + pte_val += PUD_SIZE; + vstart += PUD_SIZE; + pud++; + } + return vstart; +} + +static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend, + bool guard) +{ + if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE) + return true; + + return false; +} + +static unsigned long __ref kernel_map_hugepmd(unsigned long vstart, + unsigned long vend, + pmd_t *pmd) +{ + const unsigned long mask256mb = (1UL << 28) - 1UL; + const unsigned long mask2gb = (1UL << 31) - 1UL; + u64 pte_val = vstart; + + /* Each PMD is 8MB */ + if ((vstart & mask256mb) || + (vend - vstart <= mask256mb)) { + pte_val ^= kern_linear_pte_xor[0]; + pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE; + + return vstart + PMD_SIZE; + } + + if ((vstart & mask2gb) || + (vend - vstart <= mask2gb)) { + pte_val ^= kern_linear_pte_xor[1]; + pte_val |= _PAGE_PMD_HUGE; + vend = vstart + mask256mb + 1UL; + } else { + pte_val ^= kern_linear_pte_xor[2]; + pte_val |= _PAGE_PMD_HUGE; + vend = vstart + mask2gb + 1UL; + } + + while (vstart < vend) { + pmd_val(*pmd) = pte_val; + + pte_val += PMD_SIZE; + vstart += PMD_SIZE; + pmd++; + } + + return vstart; +} + +static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend, + bool guard) +{ + if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE) + return true; + + return false; +} + static unsigned long __ref kernel_map_range(unsigned long pstart, - unsigned long pend, pgprot_t prot) + unsigned long pend, pgprot_t prot, + bool use_huge) { unsigned long vstart = PAGE_OFFSET + pstart; unsigned long vend = PAGE_OFFSET + pend; @@ -1387,19 +1521,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, pmd_t *pmd; pte_t *pte; + if (pgd_none(*pgd)) { + pud_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + alloc_bytes += PAGE_SIZE; + pgd_populate(&init_mm, pgd, new); + } pud = pud_offset(pgd, vstart); if (pud_none(*pud)) { pmd_t *new; + if (kernel_can_map_hugepud(vstart, vend, use_huge)) { + vstart = kernel_map_hugepud(vstart, vend, pud); + continue; + } new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); alloc_bytes += PAGE_SIZE; pud_populate(&init_mm, pud, new); } pmd = pmd_offset(pud, vstart); - if (!pmd_present(*pmd)) { + if (pmd_none(*pmd)) { pte_t *new; + if (kernel_can_map_hugepmd(vstart, vend, use_huge)) { + vstart = kernel_map_hugepmd(vstart, vend, pmd); + continue; + } new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); alloc_bytes += PAGE_SIZE; pmd_populate_kernel(&init_mm, pmd, new); @@ -1422,100 +1571,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, return alloc_bytes; } -extern unsigned int kvmap_linear_patch[1]; -#endif /* CONFIG_DEBUG_PAGEALLOC */ - -static void __init kpte_set_val(unsigned long index, unsigned long val) -{ - unsigned long *ptr = kpte_linear_bitmap; - - val <<= ((index % (BITS_PER_LONG / 2)) * 2); - ptr += (index / (BITS_PER_LONG / 2)); - - *ptr |= val; -} - -static const unsigned long kpte_shift_min = 28; /* 256MB */ -static const unsigned long kpte_shift_max = 34; /* 16GB */ -static const unsigned long kpte_shift_incr = 3; - -static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, - unsigned long shift) +static void __init flush_all_kernel_tsbs(void) { - unsigned long size = (1UL << shift); - unsigned long mask = (size - 1UL); - unsigned long remains = end - start; - unsigned long val; - - if (remains < size || (start & mask)) - return start; - - /* VAL maps: - * - * shift 28 --> kern_linear_pte_xor index 1 - * shift 31 --> kern_linear_pte_xor index 2 - * shift 34 --> kern_linear_pte_xor index 3 - */ - val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; - - remains &= ~mask; - if (shift != kpte_shift_max) - remains = size; - - while (remains) { - unsigned long index = start >> kpte_shift_min; + int i; - kpte_set_val(index, val); + for (i = 0; i < KERNEL_TSB_NENTRIES; i++) { + struct tsb *ent = &swapper_tsb[i]; - start += 1UL << kpte_shift_min; - remains -= 1UL << kpte_shift_min; + ent->tag = (1UL << TSB_TAG_INVALID_BIT); } +#ifndef CONFIG_DEBUG_PAGEALLOC + for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) { + struct tsb *ent = &swapper_4m_tsb[i]; - return start; -} - -static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) -{ - unsigned long smallest_size, smallest_mask; - unsigned long s; - - smallest_size = (1UL << kpte_shift_min); - smallest_mask = (smallest_size - 1UL); - - while (start < end) { - unsigned long orig_start = start; - - for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { - start = kpte_mark_using_shift(start, end, s); - - if (start != orig_start) - break; - } - - if (start == orig_start) - start = (start + smallest_size) & ~smallest_mask; + ent->tag = (1UL << TSB_TAG_INVALID_BIT); } +#endif } -static void __init init_kpte_bitmap(void) -{ - unsigned long i; - - for (i = 0; i < pall_ents; i++) { - unsigned long phys_start, phys_end; - - phys_start = pall[i].phys_addr; - phys_end = phys_start + pall[i].reg_size; - - mark_kpte_bitmap(phys_start, phys_end); - } -} +extern unsigned int kvmap_linear_patch[1]; static void __init kernel_physical_mapping_init(void) { -#ifdef CONFIG_DEBUG_PAGEALLOC unsigned long i, mem_alloced = 0UL; + bool use_huge = true; +#ifdef CONFIG_DEBUG_PAGEALLOC + use_huge = false; +#endif for (i = 0; i < pall_ents; i++) { unsigned long phys_start, phys_end; @@ -1523,7 +1606,7 @@ static void __init kernel_physical_mapping_init(void) phys_end = phys_start + pall[i].reg_size; mem_alloced += kernel_map_range(phys_start, phys_end, - PAGE_KERNEL); + PAGE_KERNEL, use_huge); } printk("Allocated %ld bytes for kernel page tables.\n", @@ -1532,8 +1615,9 @@ static void __init kernel_physical_mapping_init(void) kvmap_linear_patch[0] = 0x01000000; /* nop */ flushi(&kvmap_linear_patch[0]); + flush_all_kernel_tsbs(); + __flush_tlb_all(); -#endif } #ifdef CONFIG_DEBUG_PAGEALLOC @@ -1543,7 +1627,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) unsigned long phys_end = phys_start + (numpages * PAGE_SIZE); kernel_map_range(phys_start, phys_end, - (enable ? PAGE_KERNEL : __pgprot(0))); + (enable ? PAGE_KERNEL : __pgprot(0)), false); flush_tsb_kernel_range(PAGE_OFFSET + phys_start, PAGE_OFFSET + phys_end); @@ -1571,76 +1655,56 @@ unsigned long __init find_ecache_flush_span(unsigned long size) unsigned long PAGE_OFFSET; EXPORT_SYMBOL(PAGE_OFFSET); -static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits) -{ - unsigned long final_shift; - unsigned int val = *insn; - unsigned int cnt; - - /* We are patching in ilog2(max_supported_phys_address), and - * we are doing so in a manner similar to a relocation addend. - * That is, we are adding the shift value to whatever value - * is in the shift instruction count field already. - */ - cnt = (val & 0x3f); - val &= ~0x3f; - - /* If we are trying to shift >= 64 bits, clear the destination - * register. This can happen when phys_bits ends up being equal - * to MAX_PHYS_ADDRESS_BITS. - */ - final_shift = (cnt + (64 - phys_bits)); - if (final_shift >= 64) { - unsigned int rd = (val >> 25) & 0x1f; - - val = 0x80100000 | (rd << 25); - } else { - val |= final_shift; - } - *insn = val; - - __asm__ __volatile__("flush %0" - : /* no outputs */ - : "r" (insn)); -} - -static void __init page_offset_shift_patch(unsigned long phys_bits) -{ - extern unsigned int __page_offset_shift_patch; - extern unsigned int __page_offset_shift_patch_end; - unsigned int *p; - - p = &__page_offset_shift_patch; - while (p < &__page_offset_shift_patch_end) { - unsigned int *insn = (unsigned int *)(unsigned long)*p; +unsigned long VMALLOC_END = 0x0000010000000000UL; +EXPORT_SYMBOL(VMALLOC_END); - page_offset_shift_patch_one(insn, phys_bits); - - p++; - } -} +unsigned long sparc64_va_hole_top = 0xfffff80000000000UL; +unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL; static void __init setup_page_offset(void) { - unsigned long max_phys_bits = 40; - if (tlb_type == cheetah || tlb_type == cheetah_plus) { + /* Cheetah/Panther support a full 64-bit virtual + * address, so we can use all that our page tables + * support. + */ + sparc64_va_hole_top = 0xfff0000000000000UL; + sparc64_va_hole_bottom = 0x0010000000000000UL; + max_phys_bits = 42; } else if (tlb_type == hypervisor) { switch (sun4v_chip_type) { case SUN4V_CHIP_NIAGARA1: case SUN4V_CHIP_NIAGARA2: + /* T1 and T2 support 48-bit virtual addresses. */ + sparc64_va_hole_top = 0xffff800000000000UL; + sparc64_va_hole_bottom = 0x0000800000000000UL; + max_phys_bits = 39; break; case SUN4V_CHIP_NIAGARA3: + /* T3 supports 48-bit virtual addresses. */ + sparc64_va_hole_top = 0xffff800000000000UL; + sparc64_va_hole_bottom = 0x0000800000000000UL; + max_phys_bits = 43; break; case SUN4V_CHIP_NIAGARA4: case SUN4V_CHIP_NIAGARA5: case SUN4V_CHIP_SPARC64X: - default: + case SUN4V_CHIP_SPARC_M6: + /* T4 and later support 52-bit virtual addresses. */ + sparc64_va_hole_top = 0xfff8000000000000UL; + sparc64_va_hole_bottom = 0x0008000000000000UL; max_phys_bits = 47; break; + case SUN4V_CHIP_SPARC_M7: + default: + /* M7 and later support 52-bit virtual addresses. */ + sparc64_va_hole_top = 0xfff8000000000000UL; + sparc64_va_hole_bottom = 0x0008000000000000UL; + max_phys_bits = 49; + break; } } @@ -1650,12 +1714,16 @@ static void __init setup_page_offset(void) prom_halt(); } - PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits); + PAGE_OFFSET = sparc64_va_hole_top; + VMALLOC_END = ((sparc64_va_hole_bottom >> 1) + + (sparc64_va_hole_bottom >> 2)); - pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", + pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", PAGE_OFFSET, max_phys_bits); - - page_offset_shift_patch(max_phys_bits); + pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n", + VMALLOC_START, VMALLOC_END); + pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n", + VMEMMAP_BASE, VMEMMAP_BASE << 1); } static void __init tsb_phys_patch(void) @@ -1700,21 +1768,42 @@ static void __init tsb_phys_patch(void) #define NUM_KTSB_DESCR 1 #endif static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; -extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + +/* The swapper TSBs are loaded with a base sequence of: + * + * sethi %uhi(SYMBOL), REG1 + * sethi %hi(SYMBOL), REG2 + * or REG1, %ulo(SYMBOL), REG1 + * or REG2, %lo(SYMBOL), REG2 + * sllx REG1, 32, REG1 + * or REG1, REG2, REG1 + * + * When we use physical addressing for the TSB accesses, we patch the + * first four instructions in the above sequence. + */ static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa) { - pa >>= KTSB_PHYS_SHIFT; + unsigned long high_bits, low_bits; + + high_bits = (pa >> 32) & 0xffffffff; + low_bits = (pa >> 0) & 0xffffffff; while (start < end) { unsigned int *ia = (unsigned int *)(unsigned long)*start; - ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10); + ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10); __asm__ __volatile__("flush %0" : : "r" (ia)); - ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff); + ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10); __asm__ __volatile__("flush %0" : : "r" (ia + 1)); + ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff); + __asm__ __volatile__("flush %0" : : "r" (ia + 2)); + + ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff); + __asm__ __volatile__("flush %0" : : "r" (ia + 3)); + start++; } } @@ -1853,11 +1942,56 @@ static void __init sun4v_linear_pte_xor_finalize(void) /* paging_init() sets up the page tables */ static unsigned long last_valid_pfn; -pgd_t swapper_pg_dir[PTRS_PER_PGD]; static void sun4u_pgprot_init(void); static void sun4v_pgprot_init(void); +static phys_addr_t __init available_memory(void) +{ + phys_addr_t available = 0ULL; + phys_addr_t pa_start, pa_end; + u64 i; + + for_each_free_mem_range(i, NUMA_NO_NODE, &pa_start, &pa_end, NULL) + available = available + (pa_end - pa_start); + + return available; +} + +/* We need to exclude reserved regions. This exclusion will include + * vmlinux and initrd. To be more precise the initrd size could be used to + * compute a new lower limit because it is freed later during initialization. + */ +static void __init reduce_memory(phys_addr_t limit_ram) +{ + phys_addr_t avail_ram = available_memory(); + phys_addr_t pa_start, pa_end; + u64 i; + + if (limit_ram >= avail_ram) + return; + + for_each_free_mem_range(i, NUMA_NO_NODE, &pa_start, &pa_end, NULL) { + phys_addr_t region_size = pa_end - pa_start; + phys_addr_t clip_start = pa_start; + + avail_ram = avail_ram - region_size; + /* Are we consuming too much? */ + if (avail_ram < limit_ram) { + phys_addr_t give_back = limit_ram - avail_ram; + + region_size = region_size - give_back; + clip_start = clip_start + give_back; + } + + memblock_remove(clip_start, region_size); + + if (avail_ram <= limit_ram) + break; + i = 0UL; + } +} + void __init paging_init(void) { unsigned long end_pfn, shift, phys_base; @@ -1937,7 +2071,8 @@ void __init paging_init(void) find_ramdisk(phys_base); - memblock_enforce_memory_limit(cmdline_memory_size); + if (cmdline_memory_size) + reduce_memory(cmdline_memory_size); memblock_allow_resize(); memblock_dump_all(); @@ -1956,16 +2091,10 @@ void __init paging_init(void) */ init_mm.pgd += ((shift) / (sizeof(pgd_t))); - memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir)); + memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir)); - /* Now can init the kernel/bad page tables. */ - pud_set(pud_offset(&swapper_pg_dir[0], 0), - swapper_low_pmd_dir + (shift / sizeof(pgd_t))); - inherit_prom_mappings(); - init_kpte_bitmap(); - /* Ok, we can use our TLB miss and window trap handlers safely. */ setup_tba(); @@ -2072,70 +2201,6 @@ int page_in_phys_avail(unsigned long paddr) return 0; } -static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; -static int pavail_rescan_ents __initdata; - -/* Certain OBP calls, such as fetching "available" properties, can - * claim physical memory. So, along with initializing the valid - * address bitmap, what we do here is refetch the physical available - * memory list again, and make sure it provides at least as much - * memory as 'pavail' does. - */ -static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap) -{ - int i; - - read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); - - for (i = 0; i < pavail_ents; i++) { - unsigned long old_start, old_end; - - old_start = pavail[i].phys_addr; - old_end = old_start + pavail[i].reg_size; - while (old_start < old_end) { - int n; - - for (n = 0; n < pavail_rescan_ents; n++) { - unsigned long new_start, new_end; - - new_start = pavail_rescan[n].phys_addr; - new_end = new_start + - pavail_rescan[n].reg_size; - - if (new_start <= old_start && - new_end >= (old_start + PAGE_SIZE)) { - set_bit(old_start >> ILOG2_4MB, bitmap); - goto do_next_page; - } - } - - prom_printf("mem_init: Lost memory in pavail\n"); - prom_printf("mem_init: OLD start[%lx] size[%lx]\n", - pavail[i].phys_addr, - pavail[i].reg_size); - prom_printf("mem_init: NEW start[%lx] size[%lx]\n", - pavail_rescan[i].phys_addr, - pavail_rescan[i].reg_size); - prom_printf("mem_init: Cannot continue, aborting.\n"); - prom_halt(); - - do_next_page: - old_start += PAGE_SIZE; - } - } -} - -static void __init patch_tlb_miss_handler_bitmap(void) -{ - extern unsigned int valid_addr_bitmap_insn[]; - extern unsigned int valid_addr_bitmap_patch[]; - - valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1]; - mb(); - valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0]; - flushi(&valid_addr_bitmap_insn[0]); -} - static void __init register_page_bootmem_info(void) { #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -2148,18 +2213,6 @@ static void __init register_page_bootmem_info(void) } void __init mem_init(void) { - unsigned long addr, last; - - addr = PAGE_OFFSET + kern_base; - last = PAGE_ALIGN(kern_size) + addr; - while (addr < last) { - set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap); - addr += PAGE_SIZE; - } - - setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap); - patch_tlb_miss_handler_bitmap(); - high_memory = __va(last_valid_pfn << PAGE_SHIFT); register_page_bootmem_info(); @@ -2249,18 +2302,9 @@ unsigned long _PAGE_CACHE __read_mostly; EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP -unsigned long vmemmap_table[VMEMMAP_SIZE]; - -static long __meminitdata addr_start, addr_end; -static int __meminitdata node_start; - int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, int node) { - unsigned long phys_start = (vstart - VMEMMAP_BASE); - unsigned long phys_end = (vend - VMEMMAP_BASE); - unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK; - unsigned long end = VMEMMAP_ALIGN(phys_end); unsigned long pte_base; pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U | @@ -2271,47 +2315,52 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); - for (; addr < end; addr += VMEMMAP_CHUNK) { - unsigned long *vmem_pp = - vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT); - void *block; + pte_base |= _PAGE_PMD_HUGE; - if (!(*vmem_pp & _PAGE_VALID)) { - block = vmemmap_alloc_block(1UL << ILOG2_4MB, node); - if (!block) + vstart = vstart & PMD_MASK; + vend = ALIGN(vend, PMD_SIZE); + for (; vstart < vend; vstart += PMD_SIZE) { + pgd_t *pgd = pgd_offset_k(vstart); + unsigned long pte; + pud_t *pud; + pmd_t *pmd; + + if (pgd_none(*pgd)) { + pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node); + + if (!new) return -ENOMEM; + pgd_populate(&init_mm, pgd, new); + } - *vmem_pp = pte_base | __pa(block); + pud = pud_offset(pgd, vstart); + if (pud_none(*pud)) { + pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node); - /* check to see if we have contiguous blocks */ - if (addr_end != addr || node_start != node) { - if (addr_start) - printk(KERN_DEBUG " [%lx-%lx] on node %d\n", - addr_start, addr_end-1, node_start); - addr_start = addr; - node_start = node; - } - addr_end = addr + VMEMMAP_CHUNK; + if (!new) + return -ENOMEM; + pud_populate(&init_mm, pud, new); } - } - return 0; -} -void __meminit vmemmap_populate_print_last(void) -{ - if (addr_start) { - printk(KERN_DEBUG " [%lx-%lx] on node %d\n", - addr_start, addr_end-1, node_start); - addr_start = 0; - addr_end = 0; - node_start = 0; + pmd = pmd_offset(pud, vstart); + + pte = pmd_val(*pmd); + if (!(pte & _PAGE_VALID)) { + void *block = vmemmap_alloc_block(PMD_SIZE, node); + + if (!block) + return -ENOMEM; + + pmd_val(*pmd) = pte_base | __pa(block); + } } + + return 0; } void vmemmap_free(unsigned long start, unsigned long end) { } - #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void prot_init_common(unsigned long page_none, @@ -2787,8 +2836,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS); } if (end > HI_OBP_ADDRESS) { - flush_tsb_kernel_range(end, HI_OBP_ADDRESS); - do_flush_tlb_kernel_range(end, HI_OBP_ADDRESS); + flush_tsb_kernel_range(HI_OBP_ADDRESS, end); + do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end); } } else { flush_tsb_kernel_range(start, end); diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index 0668b364f44d..a4c09603b05c 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h @@ -8,15 +8,8 @@ */ #define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS) -#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) -#define KPTE_BITMAP_BYTES \ - ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) -#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) -#define VALID_ADDR_BITMAP_BYTES \ - ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) extern unsigned long kern_linear_pte_xor[4]; -extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; extern unsigned int sparc64_highest_unlocked_tlb_ent; extern unsigned long sparc64_kern_pri_context; extern unsigned long sparc64_kern_pri_nuc_bits; @@ -38,15 +31,4 @@ extern unsigned long kern_locked_tte_data; void prom_world(int enter); -#ifdef CONFIG_SPARSEMEM_VMEMMAP -#define VMEMMAP_CHUNK_SHIFT 22 -#define VMEMMAP_CHUNK (1UL << VMEMMAP_CHUNK_SHIFT) -#define VMEMMAP_CHUNK_MASK ~(VMEMMAP_CHUNK - 1UL) -#define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK) - -#define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \ - sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT) -extern unsigned long vmemmap_table[VMEMMAP_SIZE]; -#endif - #endif /* _SPARC64_MM_INIT_H */ diff --git a/arch/sparc/power/hibernate_asm.S b/arch/sparc/power/hibernate_asm.S index 79942166df84..d7d9017dcb15 100644 --- a/arch/sparc/power/hibernate_asm.S +++ b/arch/sparc/power/hibernate_asm.S @@ -54,8 +54,8 @@ ENTRY(swsusp_arch_resume) nop /* Write PAGE_OFFSET to %g7 */ - sethi %uhi(PAGE_OFFSET), %g7 - sllx %g7, 32, %g7 + sethi %hi(PAGE_OFFSET), %g7 + ldx [%g7 + %lo(PAGE_OFFSET)], %g7 setuw (PAGE_SIZE-8), %g3 diff --git a/arch/sparc/prom/bootstr_64.c b/arch/sparc/prom/bootstr_64.c index ab9ccc63b388..7149e77714a4 100644 --- a/arch/sparc/prom/bootstr_64.c +++ b/arch/sparc/prom/bootstr_64.c @@ -14,7 +14,10 @@ * the .bss section or it will break things. */ -#define BARG_LEN 256 +/* We limit BARG_LEN to 1024 because this is the size of the + * 'barg_out' command line buffer in the SILO bootloader. + */ +#define BARG_LEN 1024 struct { int bootstr_len; int bootstr_valid; diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c index e58b81726319..b2340f008ae0 100644 --- a/arch/sparc/prom/p1275.c +++ b/arch/sparc/prom/p1275.c @@ -9,6 +9,7 @@ #include <linux/smp.h> #include <linux/string.h> #include <linux/spinlock.h> +#include <linux/irqflags.h> #include <asm/openprom.h> #include <asm/oplib.h> @@ -36,8 +37,8 @@ void p1275_cmd_direct(unsigned long *args) { unsigned long flags; - raw_local_save_flags(flags); - raw_local_irq_restore((unsigned long)PIL_NMI); + local_save_flags(flags); + local_irq_restore((unsigned long)PIL_NMI); raw_spin_lock(&prom_entry_lock); prom_world(1); @@ -45,7 +46,7 @@ void p1275_cmd_direct(unsigned long *args) prom_world(0); raw_spin_unlock(&prom_entry_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); } void prom_cif_init(void *cif_handler, void *cif_stack) diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 9e3a72205827..dd16c902ff70 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -79,7 +79,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, set_task_state(current, TASK_INTERRUPTIBLE); schedule(); - set_task_state(current, TASK_RUNNING); remove_wait_queue(&host_read_wait, &wait); if (atomic_dec_and_test(&host_sleep_count)) { diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore index 7cab8c08e6d1..aff152c87cf4 100644 --- a/arch/x86/.gitignore +++ b/arch/x86/.gitignore @@ -1,4 +1,6 @@ boot/compressed/vmlinux tools/test_get_len tools/insn_sanity +purgatory/kexec-purgatory.c +purgatory/purgatory.ro diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3eb8a41509b3..f2327e88e07c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -491,6 +491,36 @@ config X86_INTEL_LPSS things like clock tree (common clock framework) and pincontrol which are needed by the LPSS peripheral drivers. +config IOSF_MBI + tristate "Intel SoC IOSF Sideband support for SoC platforms" + depends on PCI + ---help--- + This option enables sideband register access support for Intel SoC + platforms. On these platforms the IOSF sideband is used in lieu of + MSR's for some register accesses, mostly but not limited to thermal + and power. Drivers may query the availability of this device to + determine if they need the sideband in order to work on these + platforms. The sideband is available on the following SoC products. + This list is not meant to be exclusive. + - BayTrail + - Braswell + - Quark + + You should say Y if you are running a kernel on one of these SoC's. + +config IOSF_MBI_DEBUG + bool "Enable IOSF sideband access through debugfs" + depends on IOSF_MBI && DEBUG_FS + ---help--- + Select this option to expose the IOSF sideband access registers (MCR, + MDR, MCRX) through debugfs to write and read register information from + different units on the SoC. This is most useful for obtaining device + state information for debug and analysis. As this is a general access + mechanism, users of this option would have specific knowledge of the + device they want to access. + + If you don't require the option or are in doubt, say N. + config X86_RDC321X bool "RDC R-321x SoC" depends on X86_32 @@ -2454,11 +2484,6 @@ config X86_DMA_REMAP bool depends on STA2X11 -config IOSF_MBI - tristate - default m - depends on PCI - config PMC_ATOM def_bool y depends on PCI diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5692d6ac0f18..920e6160c535 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -50,9 +50,6 @@ ifeq ($(CONFIG_X86_32),y) KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return - # Don't autogenerate MMX or SSE instructions - KBUILD_CFLAGS += -mno-mmx -mno-sse - # Never want PIC in a 32-bit kernel, prevent breakage with GCC built # with nonstandard options KBUILD_CFLAGS += -fno-pic @@ -80,8 +77,7 @@ else KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 - # Don't autogenerate traditional x87, MMX or SSE instructions - KBUILD_CFLAGS += -mno-mmx -mno-sse + # Don't autogenerate traditional x87 instructions KBUILD_CFLAGS += $(call cc-option,-mno-80387) KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) @@ -168,7 +164,7 @@ KBUILD_CFLAGS += -Wno-sign-compare # KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # prevent gcc from generating any FP code by mistake -KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 7c68808edeb7..bb1376381985 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -194,7 +194,7 @@ static bool mem_avoid_overlap(struct mem_vector *img) while (ptr) { struct mem_vector avoid; - avoid.start = (u64)ptr; + avoid.start = (unsigned long)ptr; avoid.size = sizeof(*ptr) + ptr->len; if (mem_overlaps(img, &avoid)) diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 4579eff0ef4d..637097e66a62 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -16,6 +16,7 @@ #include <stdio.h> #include "../include/asm/required-features.h" +#include "../include/asm/disabled-features.h" #include "../include/asm/cpufeature.h" #include "../kernel/cpu/capflags.c" diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index d21ff89207cd..df91466f973d 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -308,11 +308,8 @@ static int load_aout_binary(struct linux_binprm *bprm) (current->mm->start_brk = N_BSSADDR(ex)); retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); + if (retval < 0) return retval; - } install_exec_creds(bprm); @@ -324,17 +321,13 @@ static int load_aout_binary(struct linux_binprm *bprm) error = vm_brk(text_addr & PAGE_MASK, map_size); - if (error != (text_addr & PAGE_MASK)) { - send_sig(SIGKILL, current, 0); + if (error != (text_addr & PAGE_MASK)) return error; - } error = read_code(bprm->file, text_addr, 32, ex.a_text + ex.a_data); - if ((signed long)error < 0) { - send_sig(SIGKILL, current, 0); + if ((signed long)error < 0) return error; - } } else { #ifdef WARN_OLD static unsigned long error_time, error_time2; @@ -368,20 +361,16 @@ static int load_aout_binary(struct linux_binprm *bprm) MAP_EXECUTABLE | MAP_32BIT, fd_offset); - if (error != N_TXTADDR(ex)) { - send_sig(SIGKILL, current, 0); + if (error != N_TXTADDR(ex)) return error; - } error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset + ex.a_text); - if (error != N_DATADDR(ex)) { - send_sig(SIGKILL, current, 0); + if (error != N_DATADDR(ex)) return error; - } } beyond_if: set_binfmt(&aout_format); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 4299eb05023c..711de084ab57 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -151,6 +151,16 @@ ENTRY(ia32_sysenter_target) 1: movl (%rbp),%ebp _ASM_EXTABLE(1b,ia32_badarg) ASM_CLAC + + /* + * Sysenter doesn't filter flags, so we need to clear NT + * ourselves. To save a few cycles, we can check whether + * NT was set instead of doing an unconditional popfq. + */ + testl $X86_EFLAGS_NT,EFLAGS(%rsp) /* saved EFLAGS match cpu */ + jnz sysenter_fix_flags +sysenter_flags_fixed: + orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE @@ -184,6 +194,8 @@ sysexit_from_sys_call: TRACE_IRQS_ON ENABLE_INTERRUPTS_SYSEXIT32 + CFI_RESTORE_STATE + #ifdef CONFIG_AUDITSYSCALL .macro auditsys_entry_common movl %esi,%r9d /* 6th arg: 4th syscall arg */ @@ -226,7 +238,6 @@ sysexit_from_sys_call: .endm sysenter_auditsys: - CFI_RESTORE_STATE auditsys_entry_common movl %ebp,%r9d /* reload 6th syscall arg */ jmp sysenter_dispatch @@ -235,6 +246,11 @@ sysexit_audit: auditsys_exit sysexit_from_sys_call #endif +sysenter_fix_flags: + pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) + popfq_cfi + jmp sysenter_flags_fixed + sysenter_tracesys: #ifdef CONFIG_AUDITSYSCALL testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 6dd1c7dd0473..5e5cd123fdfb 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -24,7 +24,7 @@ */ static inline int atomic_read(const atomic_t *v) { - return (*(volatile int *)&(v)->counter); + return ACCESS_ONCE((v)->counter); } /** @@ -219,21 +219,6 @@ static inline short int atomic_inc_short(short int *v) return *v; } -#ifdef CONFIG_X86_64 -/** - * atomic_or_long - OR of two long integers - * @v1: pointer to type unsigned long - * @v2: pointer to type unsigned long - * - * Atomically ORs @v1 and @v2 - * Returns the result of the OR - */ -static inline void atomic_or_long(unsigned long *v1, unsigned long v2) -{ - asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2)); -} -#endif - /* These are x86-specific, used by some header files */ #define atomic_clear_mask(mask, addr) \ asm volatile(LOCK_PREFIX "andl %0,%1" \ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 46e9052bbd28..f8d273e18516 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -18,7 +18,7 @@ */ static inline long atomic64_read(const atomic64_t *v) { - return (*(volatile long *)&(v)->counter); + return ACCESS_ONCE((v)->counter); } /** diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index cb4c73bfeb48..76659b67fd11 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -85,7 +85,7 @@ For 32-bit we have the following conventions - kernel is built with #define ARGOFFSET R11 #define SWFRAME ORIG_RAX - .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1 + .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 subq $9*8+\addskip, %rsp CFI_ADJUST_CFA_OFFSET 9*8+\addskip movq_cfi rdi, 8*8 @@ -96,7 +96,11 @@ For 32-bit we have the following conventions - kernel is built with movq_cfi rcx, 5*8 .endif + .if \rax_enosys + movq $-ENOSYS, 4*8(%rsp) + .else movq_cfi rax, 4*8 + .endif .if \save_r891011 movq_cfi r8, 3*8 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 094292a63e74..0bb1335313b2 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -8,6 +8,10 @@ #include <asm/required-features.h> #endif +#ifndef _ASM_X86_DISABLED_FEATURES_H +#include <asm/disabled-features.h> +#endif + #define NCAPINTS 11 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ @@ -282,6 +286,18 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) +#define DISABLED_MASK_BIT_SET(bit) \ + ( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0)) || \ + (((bit)>>5)==1 && (1UL<<((bit)&31) & DISABLED_MASK1)) || \ + (((bit)>>5)==2 && (1UL<<((bit)&31) & DISABLED_MASK2)) || \ + (((bit)>>5)==3 && (1UL<<((bit)&31) & DISABLED_MASK3)) || \ + (((bit)>>5)==4 && (1UL<<((bit)&31) & DISABLED_MASK4)) || \ + (((bit)>>5)==5 && (1UL<<((bit)&31) & DISABLED_MASK5)) || \ + (((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6)) || \ + (((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7)) || \ + (((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8)) || \ + (((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9)) ) + #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ test_cpu_cap(c, bit)) @@ -290,6 +306,18 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) +/* + * This macro is for detection of features which need kernel + * infrastructure to be used. It may *not* directly test the CPU + * itself. Use the cpu_has() family if you want true runtime + * testing of CPU features, like in hypervisor code where you are + * supporting a possible guest feature where host support for it + * is not relevant. + */ +#define cpu_feature_enabled(bit) \ + (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \ + cpu_has(&boot_cpu_data, bit)) + #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) @@ -304,11 +332,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) #define cpu_has_de boot_cpu_has(X86_FEATURE_DE) #define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) -#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE) #define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) @@ -324,9 +350,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) -#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) -#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) -#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) #define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) #define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN) #define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT) @@ -361,25 +384,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) -#ifdef CONFIG_X86_64 - -#undef cpu_has_vme -#define cpu_has_vme 0 - -#undef cpu_has_pae -#define cpu_has_pae ___BUG___ - -#undef cpu_has_k6_mtrr -#define cpu_has_k6_mtrr 0 - -#undef cpu_has_cyrix_arr -#define cpu_has_cyrix_arr 0 - -#undef cpu_has_centaur_mcr -#define cpu_has_centaur_mcr 0 - -#endif /* CONFIG_X86_64 */ - #if __GNUC__ >= 4 extern void warn_pre_alternatives(void); extern bool __static_cpu_has_safe(u16 bit); diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h new file mode 100644 index 000000000000..97534a7d38e3 --- /dev/null +++ b/arch/x86/include/asm/disabled-features.h @@ -0,0 +1,39 @@ +#ifndef _ASM_X86_DISABLED_FEATURES_H +#define _ASM_X86_DISABLED_FEATURES_H + +/* These features, although they might be available in a CPU + * will not be used because the compile options to support + * them are not present. + * + * This code allows them to be checked and disabled at + * compile time without an explicit #ifdef. Use + * cpu_feature_enabled(). + */ + +#ifdef CONFIG_X86_64 +# define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) +# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) +# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) +# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) +#else +# define DISABLE_VME 0 +# define DISABLE_K6_MTRR 0 +# define DISABLE_CYRIX_ARR 0 +# define DISABLE_CENTAUR_MCR 0 +#endif /* CONFIG_X86_64 */ + +/* + * Make sure to add features to the correct mask + */ +#define DISABLED_MASK0 (DISABLE_VME) +#define DISABLED_MASK1 0 +#define DISABLED_MASK2 0 +#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) +#define DISABLED_MASK4 0 +#define DISABLED_MASK5 0 +#define DISABLED_MASK6 0 +#define DISABLED_MASK7 0 +#define DISABLED_MASK8 0 +#define DISABLED_MASK9 0 + +#endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1a055c81d864..ca3347a9dab5 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -160,8 +160,9 @@ do { \ #define elf_check_arch(x) \ ((x)->e_machine == EM_X86_64) -#define compat_elf_check_arch(x) \ - (elf_check_arch_ia32(x) || (x)->e_machine == EM_X86_64) +#define compat_elf_check_arch(x) \ + (elf_check_arch_ia32(x) || \ + (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) #if __USER32_DS != __USER_DS # error "The following code assumes __USER32_DS == __USER_DS" diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 412ececa00b9..e97622f57722 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -344,7 +344,7 @@ static inline void __thread_fpu_end(struct task_struct *tsk) static inline void __thread_fpu_begin(struct task_struct *tsk) { - if (!static_cpu_has_safe(X86_FEATURE_EAGER_FPU)) + if (!use_eager_fpu()) clts(); __thread_set_has_fpu(tsk); } diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 9067166409bf..bbe296e0bce1 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -43,7 +43,7 @@ struct extended_sigtable { #define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ - (((struct microcode_intel *)mc)->hdr.totalsize ? \ + (((struct microcode_intel *)mc)->hdr.datasize ? \ ((struct microcode_intel *)mc)->hdr.totalsize : \ DEFAULT_UCODE_TOTALSIZE) diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 4064acae625d..01b493e5a99b 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -9,7 +9,6 @@ #ifdef CONFIG_NUMA #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) -#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) /* * Too small node sizes may confuse the VM badly. Usually they diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df45d2f2..8dfc9fd094a3 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS \ + (ARCH_PERFMON_EVENTSEL_EDGE | \ + ARCH_PERFMON_EVENTSEL_INV | \ + ARCH_PERFMON_EVENTSEL_CMASK | \ + ARCH_PERFMON_EVENTSEL_ANY | \ + ARCH_PERFMON_EVENTSEL_PIN_CONTROL | \ + HSW_IN_TX | \ + HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 9ee322103c6d..b6c0b404898a 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -32,9 +32,6 @@ static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); -extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); - - /* * Define this if things work differently on an i386 and an i486: * it will (on an i486) warn about kernel memory accesses that are diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 3874693c0e53..4572b2f30237 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -116,7 +116,8 @@ static inline void native_pgd_clear(pgd_t *pgd) native_set_pgd(pgd, native_make_pgd(0)); } -extern void sync_global_pgds(unsigned long start, unsigned long end); +extern void sync_global_pgds(unsigned long start, unsigned long end, + int removed); /* * Conversion functions: convert a page and protection to a page entry, diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0f9724c9c510..07789647bf33 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -23,7 +23,6 @@ #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 #define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ -#define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */ #define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ @@ -52,7 +51,7 @@ #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) -#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) +#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) @@ -168,10 +167,10 @@ #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) -#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) -#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) -#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) -#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) +#define __PAGE_KERNEL_IO (__PAGE_KERNEL) +#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) +#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS) +#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC) #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 6205f0c434db..86fc2bb82287 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -75,6 +75,11 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code, int si_code); + +extern unsigned long syscall_trace_enter_phase1(struct pt_regs *, u32 arch); +extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch, + unsigned long phase1_result); + extern long syscall_trace_enter(struct pt_regs *); extern void syscall_trace_leave(struct pt_regs *); diff --git a/arch/x86/include/asm/rwlock.h b/arch/x86/include/asm/rwlock.h deleted file mode 100644 index a5370a03d90c..000000000000 --- a/arch/x86/include/asm/rwlock.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef _ASM_X86_RWLOCK_H -#define _ASM_X86_RWLOCK_H - -#include <asm/asm.h> - -#if CONFIG_NR_CPUS <= 2048 - -#ifndef __ASSEMBLY__ -typedef union { - s32 lock; - s32 write; -} arch_rwlock_t; -#endif - -#define RW_LOCK_BIAS 0x00100000 -#define READ_LOCK_SIZE(insn) __ASM_FORM(insn##l) -#define READ_LOCK_ATOMIC(n) atomic_##n -#define WRITE_LOCK_ADD(n) __ASM_FORM_COMMA(addl n) -#define WRITE_LOCK_SUB(n) __ASM_FORM_COMMA(subl n) -#define WRITE_LOCK_CMP RW_LOCK_BIAS - -#else /* CONFIG_NR_CPUS > 2048 */ - -#include <linux/const.h> - -#ifndef __ASSEMBLY__ -typedef union { - s64 lock; - struct { - u32 read; - s32 write; - }; -} arch_rwlock_t; -#endif - -#define RW_LOCK_BIAS (_AC(1,L) << 32) -#define READ_LOCK_SIZE(insn) __ASM_FORM(insn##q) -#define READ_LOCK_ATOMIC(n) atomic64_##n -#define WRITE_LOCK_ADD(n) __ASM_FORM(incl) -#define WRITE_LOCK_SUB(n) __ASM_FORM(decl) -#define WRITE_LOCK_CMP 1 - -#endif /* CONFIG_NR_CPUS */ - -#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } - -/* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ - -#endif /* _ASM_X86_RWLOCK_H */ diff --git a/arch/x86/include/asm/serial.h b/arch/x86/include/asm/serial.h index 628c801535ea..460b84f64556 100644 --- a/arch/x86/include/asm/serial.h +++ b/arch/x86/include/asm/serial.h @@ -6,24 +6,24 @@ * * It'd be nice if someone built a serial card with a 24.576 MHz * clock, since the 16550A is capable of handling a top speed of 1.5 - * megabits/second; but this requires the faster clock. + * megabits/second; but this requires a faster clock. */ -#define BASE_BAUD ( 1843200 / 16 ) +#define BASE_BAUD (1843200/16) /* Standard COM flags (except for COM4, because of the 8514 problem) */ #ifdef CONFIG_SERIAL_DETECT_IRQ -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) -#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ) +# define STD_COMX_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) +# define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | 0 | ASYNC_AUTO_IRQ) #else -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) -#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF +# define STD_COMX_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | 0 ) +# define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | 0 | 0 ) #endif -#define SERIAL_PORT_DFNS \ - /* UART CLK PORT IRQ FLAGS */ \ - { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS }, /* ttyS0 */ \ - { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS }, /* ttyS1 */ \ - { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS }, /* ttyS2 */ \ - { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ +#define SERIAL_PORT_DFNS \ + /* UART CLK PORT IRQ FLAGS */ \ + { .uart = 0, BASE_BAUD, 0x3F8, 4, STD_COMX_FLAGS }, /* ttyS0 */ \ + { .uart = 0, BASE_BAUD, 0x2F8, 3, STD_COMX_FLAGS }, /* ttyS1 */ \ + { .uart = 0, BASE_BAUD, 0x3E8, 4, STD_COMX_FLAGS }, /* ttyS2 */ \ + { .uart = 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ #endif /* _ASM_X86_SERIAL_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 54f1c8068c02..9295016485c9 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -187,7 +187,6 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) cpu_relax(); } -#ifndef CONFIG_QUEUE_RWLOCK /* * Read-write spinlocks, allowing multiple readers * but only one writer. @@ -198,91 +197,15 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) * irq-safe write-lock, but readers can get non-irqsafe * read-locks. * - * On x86, we implement read-write locks as a 32-bit counter - * with the high bit (sign) being the "contended" bit. + * On x86, we implement read-write locks using the generic qrwlock with + * x86 specific optimization. */ -/** - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -static inline int arch_read_can_lock(arch_rwlock_t *lock) -{ - return lock->lock > 0; -} - -/** - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. - */ -static inline int arch_write_can_lock(arch_rwlock_t *lock) -{ - return lock->write == WRITE_LOCK_CMP; -} - -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t" - "jns 1f\n" - "call __read_lock_failed\n\t" - "1:\n" - ::LOCK_PTR_REG (rw) : "memory"); -} - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t" - "jz 1f\n" - "call __write_lock_failed\n\t" - "1:\n" - ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS) - : "memory"); -} - -static inline int arch_read_trylock(arch_rwlock_t *lock) -{ - READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock; - - if (READ_LOCK_ATOMIC(dec_return)(count) >= 0) - return 1; - READ_LOCK_ATOMIC(inc)(count); - return 0; -} - -static inline int arch_write_trylock(arch_rwlock_t *lock) -{ - atomic_t *count = (atomic_t *)&lock->write; - - if (atomic_sub_and_test(WRITE_LOCK_CMP, count)) - return 1; - atomic_add(WRITE_LOCK_CMP, count); - return 0; -} - -static inline void arch_read_unlock(arch_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0" - :"+m" (rw->lock) : : "memory"); -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" - : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); -} -#else #include <asm/qrwlock.h> -#endif /* CONFIG_QUEUE_RWLOCK */ #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#undef READ_LOCK_SIZE -#undef READ_LOCK_ATOMIC -#undef WRITE_LOCK_ADD -#undef WRITE_LOCK_SUB -#undef WRITE_LOCK_CMP - #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 73c4c007200f..5f9d7572d82b 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -34,10 +34,6 @@ typedef struct arch_spinlock { #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } -#ifdef CONFIG_QUEUE_RWLOCK #include <asm-generic/qrwlock_types.h> -#else -#include <asm/rwlock.h> -#endif #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h index bbae02470701..d993e33f5236 100644 --- a/arch/x86/include/uapi/asm/e820.h +++ b/arch/x86/include/uapi/asm/e820.h @@ -21,11 +21,6 @@ * this size. */ -/* - * Odd: 'make headers_check' complains about numa.h if I try - * to collapse the next two #ifdef lines to a single line: - * #if defined(__KERNEL__) && defined(CONFIG_EFI) - */ #ifndef __KERNEL__ #define E820_X_MAX E820MAX #endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ada2e2d6be3e..8f1e77440b2b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -39,8 +39,6 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o -obj-$(CONFIG_PREEMPT) += preempt.o - obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index ae915391ebec..4128b5fcb559 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -32,7 +32,7 @@ static int numachip_system __read_mostly; -static const struct apic apic_numachip __read_mostly; +static const struct apic apic_numachip; static unsigned int get_apic_id(unsigned long x) { diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 004f017aa7b9..8e9dcfd630e4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -204,7 +204,6 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { -#ifdef CONFIG_SMP unsigned long val; int pnode; @@ -223,7 +222,6 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); atomic_set(&init_deasserted, 1); -#endif return 0; } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 77dcab277710..01d5453b5502 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -39,7 +39,9 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_rapl.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_uncore_snb.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore_snbep.o perf_event_intel_uncore_nhmex.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c649f236e288..700f958652f8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1184,7 +1184,7 @@ void syscall_init(void) /* Flags to clear on syscall */ wrmsrl(MSR_SYSCALL_MASK, X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| - X86_EFLAGS_IOPL|X86_EFLAGS_AC); + X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); } /* @@ -1266,6 +1266,19 @@ static void dbg_restore_debug_regs(void) #define dbg_restore_debug_regs() #endif /* ! CONFIG_KGDB */ +static void wait_for_master_cpu(int cpu) +{ +#ifdef CONFIG_SMP + /* + * wait for ACK from master CPU before continuing + * with AP initialization + */ + WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)); + while (!cpumask_test_cpu(cpu, cpu_callout_mask)) + cpu_relax(); +#endif +} + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT @@ -1281,16 +1294,17 @@ void cpu_init(void) struct task_struct *me; struct tss_struct *t; unsigned long v; - int cpu; + int cpu = stack_smp_processor_id(); int i; + wait_for_master_cpu(cpu); + /* * Load microcode on this cpu if a valid microcode is available. * This is early microcode loading procedure. */ load_ucode_ap(); - cpu = stack_smp_processor_id(); t = &per_cpu(init_tss, cpu); oist = &per_cpu(orig_ist, cpu); @@ -1302,9 +1316,6 @@ void cpu_init(void) me = current; - if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) - panic("CPU#%d already initialized!\n", cpu); - pr_debug("Initializing CPU#%d\n", cpu); clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); @@ -1381,17 +1392,13 @@ void cpu_init(void) struct tss_struct *t = &per_cpu(init_tss, cpu); struct thread_struct *thread = &curr->thread; - show_ucode_info_early(); + wait_for_master_cpu(cpu); - if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) - local_irq_enable(); - } + show_ucode_info_early(); printk(KERN_INFO "Initializing CPU#%d\n", cpu); - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_current_idt(); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 74e804ddc5c7..1ef456273172 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -144,6 +144,21 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_ERMS); } } + + /* + * Intel Quark Core DevMan_001.pdf section 6.4.11 + * "The operating system also is required to invalidate (i.e., flush) + * the TLB when any changes are made to any of the page table entries. + * The operating system must reload CR3 to cause the TLB to be flushed" + * + * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should + * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE + * to be modified + */ + if (c->x86 == 5 && c->x86_model == 9) { + pr_info("Disabling PGE capability bit\n"); + setup_clear_cpu_cap(X86_FEATURE_PGE); + } } #ifdef CONFIG_X86_32 @@ -382,6 +397,13 @@ static void init_intel(struct cpuinfo_x86 *c) } l2 = init_intel_cacheinfo(c); + + /* Detect legacy cache sizes if init_intel_cacheinfo did not */ + if (l2 == 0) { + cpu_detect_cache_sizes(c); + l2 = c->x86_cache_size; + } + if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ @@ -485,6 +507,13 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) */ if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) size = 256; + + /* + * Intel Quark SoC X1000 contains a 4-way set associative + * 16K cache with a 16 byte cache line and 256 lines per tag + */ + if ((c->x86 == 5) && (c->x86_model == 9)) + size = 16; return size; } #endif @@ -686,7 +715,8 @@ static const struct cpu_dev intel_cpu_dev = { [3] = "OverDrive PODP5V83", [4] = "Pentium MMX", [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" + [8] = "Mobile Pentium MMX", + [9] = "Quark SoC X1000", } }, { .family = 6, .model_names = diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 36a1bb6d1ee0..1af51b1586d7 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -498,8 +498,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG - "CPU%d: Thermal monitoring handled by SMI\n", cpu); + if (system_state == SYSTEM_BOOTING) + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); return; } diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c index 617a9e284245..7aa1acc79789 100644 --- a/arch/x86/kernel/cpu/microcode/amd_early.c +++ b/arch/x86/kernel/cpu/microcode/amd_early.c @@ -27,7 +27,7 @@ static u32 ucode_new_rev; u8 amd_ucode_patch[PATCH_MAX_SIZE]; static u16 this_equiv_id; -struct cpio_data ucode_cpio; +static struct cpio_data ucode_cpio; /* * Microcode patch container file is prepended to the initrd in cpio format. diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index a276fa75d9b5..c6826d1e8082 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -127,7 +127,7 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) return get_matching_microcode(csig, cpf, mc_intel, crev); } -int apply_microcode(int cpu) +static int apply_microcode_intel(int cpu) { struct microcode_intel *mc_intel; struct ucode_cpu_info *uci; @@ -314,7 +314,7 @@ static struct microcode_ops microcode_intel_ops = { .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info, - .apply_microcode = apply_microcode, + .apply_microcode = apply_microcode_intel, .microcode_fini_cpu = microcode_fini_cpu, }; diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index 18f739129e72..b88343f7a3b3 100644 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c @@ -28,8 +28,8 @@ #include <asm/tlbflush.h> #include <asm/setup.h> -unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; -struct mc_saved_data { +static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; +static struct mc_saved_data { unsigned int mc_saved_count; struct microcode_intel **mc_saved; } mc_saved_data; @@ -415,7 +415,7 @@ static void __ref show_saved_mc(void) struct ucode_cpu_info uci; if (mc_saved_data.mc_saved_count == 0) { - pr_debug("no micorcode data saved.\n"); + pr_debug("no microcode data saved.\n"); return; } pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); @@ -506,7 +506,7 @@ int save_mc_for_early(u8 *mc) if (mc_saved && mc_saved_count) memcpy(mc_saved_tmp, mc_saved, - mc_saved_count * sizeof(struct mirocode_intel *)); + mc_saved_count * sizeof(struct microcode_intel *)); /* * Save the microcode patch mc in mc_save_tmp structure if it's a newer * version. @@ -526,7 +526,7 @@ int save_mc_for_early(u8 *mc) show_saved_mc(); /* - * Free old saved microcod data. + * Free old saved microcode data. */ if (mc_saved) { for (i = 0; i < mc_saved_count_init; i++) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index f961de9964c7..ea5f363a1948 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -707,7 +707,7 @@ void __init mtrr_bp_init(void) } else { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - if (cpu_has_k6_mtrr) { + if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) { /* Pre-Athlon (K6) AMD CPU MTRRs */ mtrr_if = mtrr_ops[X86_VENDOR_AMD]; size_or_mask = SIZE_OR_MASK_BITS(32); @@ -715,14 +715,14 @@ void __init mtrr_bp_init(void) } break; case X86_VENDOR_CENTAUR: - if (cpu_has_centaur_mcr) { + if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) { mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; } break; case X86_VENDOR_CYRIX: - if (cpu_has_cyrix_arr) { + if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) { mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2879ecdaac43..16c73022306e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -243,7 +243,8 @@ static bool check_hw_exists(void) msr_fail: printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); - printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new); + printk(boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR + "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new); return false; } @@ -387,7 +388,7 @@ int x86_pmu_hw_config(struct perf_event *event) precise++; /* Support for IP fixup */ - if (x86_pmu.lbr_nr) + if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2) precise++; } @@ -443,6 +444,12 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; + if (event->attr.sample_period && x86_pmu.limit_period) { + if (x86_pmu.limit_period(event, event->attr.sample_period) > + event->attr.sample_period) + return -EINVAL; + } + return x86_setup_perfctr(event); } @@ -980,6 +987,9 @@ int x86_perf_event_set_period(struct perf_event *event) if (left > x86_pmu.max_period) left = x86_pmu.max_period; + if (x86_pmu.limit_period) + left = x86_pmu.limit_period(event, left); + per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; /* diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 8ade93111e03..d98a34d435d7 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -67,8 +67,10 @@ struct event_constraint { */ #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ #define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ -#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */ +#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */ #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ +#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ +#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -252,18 +254,52 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \ + EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) + +/* Check flags and event code, and set the HSW store flag */ +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) + +/* Check flags and event code, and set the HSW load flag */ +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) + +/* Check flags and event code/umask, and set the HSW store flag */ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) +/* Check flags and event code/umask, and set the HSW load flag */ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) + +/* Check flags and event code/umask, and set the HSW N/A flag */ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW) + + /* * We define the end marker as having a weight of -1 * to enable blacklisting of events using a counter bitmask @@ -409,6 +445,7 @@ struct x86_pmu { struct x86_pmu_quirk *quirks; int perfctr_second_write; bool late_ack; + unsigned (*limit_period)(struct perf_event *event, unsigned l); /* * sysfs attrs diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2502d0d9d246..3851def5057c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -220,6 +220,15 @@ static struct event_constraint intel_hsw_event_constraints[] = { EVENT_CONSTRAINT_END }; +static struct event_constraint intel_bdw_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ + INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ + EVENT_CONSTRAINT_END +}; + static u64 intel_pmu_event_map(int hw_event) { return intel_perfmon_event_map[hw_event]; @@ -415,6 +424,126 @@ static __initconst const u64 snb_hw_cache_event_ids }; +static __initconst const u64 hsw_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */ + [ C(RESULT_ACCESS) ] = 0x1b7, + /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE| + L3_MISS|ANY_SNOOP */ + [ C(RESULT_MISS) ] = 0x1b7, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE:ALL_RFO */ + /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */ + [ C(RESULT_MISS) ] = 0x1b7, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ + [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static __initconst const u64 hsw_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */ + [ C(RESULT_ACCESS) ] = 0x2d5, + /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE| + L3_MISS|ANY_SNOOP */ + [ C(RESULT_MISS) ] = 0x3fbc0202d5ull, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x122, /* OFFCORE_RESPONSE:ALL_RFO */ + /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */ + [ C(RESULT_MISS) ] = 0x3fbc020122ull, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, +}; + static __initconst const u64 westmere_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -1905,6 +2034,24 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) return c; } +/* + * Broadwell: + * The INST_RETIRED.ALL period always needs to have lowest + * 6bits cleared (BDM57). It shall not use a period smaller + * than 100 (BDM11). We combine the two to enforce + * a min-period of 128. + */ +static unsigned bdw_limit_period(struct perf_event *event, unsigned left) +{ + if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == + X86_CONFIG(.event=0xc0, .umask=0x01)) { + if (left < 128) + left = 128; + left &= ~0x3fu; + } + return left; +} + PMU_FORMAT_ATTR(event, "config:0-7" ); PMU_FORMAT_ATTR(umask, "config:8-15" ); PMU_FORMAT_ATTR(edge, "config:18" ); @@ -2367,15 +2514,15 @@ __init int intel_pmu_init(void) * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_model) { - case 14: /* 65 nm core solo/duo, "Yonah" */ + case 14: /* 65nm Core "Yonah" */ pr_cont("Core events, "); break; - case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 15: /* 65nm Core2 "Merom" */ x86_add_quirk(intel_clovertown_quirk); - case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 29: /* six-core 45 nm xeon "Dunnington" */ + case 22: /* 65nm Core2 "Merom-L" */ + case 23: /* 45nm Core2 "Penryn" */ + case 29: /* 45nm Core2 "Dunnington (MP) */ memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -2386,9 +2533,9 @@ __init int intel_pmu_init(void) pr_cont("Core2 events, "); break; - case 26: /* 45 nm nehalem, "Bloomfield" */ - case 30: /* 45 nm nehalem, "Lynnfield" */ - case 46: /* 45 nm nehalem-ex, "Beckton" */ + case 30: /* 45nm Nehalem */ + case 26: /* 45nm Nehalem-EP */ + case 46: /* 45nm Nehalem-EX */ memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -2415,11 +2562,11 @@ __init int intel_pmu_init(void) pr_cont("Nehalem events, "); break; - case 28: /* Atom */ - case 38: /* Lincroft */ - case 39: /* Penwell */ - case 53: /* Cloverview */ - case 54: /* Cedarview */ + case 28: /* 45nm Atom "Pineview" */ + case 38: /* 45nm Atom "Lincroft" */ + case 39: /* 32nm Atom "Penwell" */ + case 53: /* 32nm Atom "Cloverview" */ + case 54: /* 32nm Atom "Cedarview" */ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -2430,8 +2577,8 @@ __init int intel_pmu_init(void) pr_cont("Atom events, "); break; - case 55: /* Atom 22nm "Silvermont" */ - case 77: /* Avoton "Silvermont" */ + case 55: /* 22nm Atom "Silvermont" */ + case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -2446,9 +2593,9 @@ __init int intel_pmu_init(void) pr_cont("Silvermont events, "); break; - case 37: /* 32 nm nehalem, "Clarkdale" */ - case 44: /* 32 nm nehalem, "Gulftown" */ - case 47: /* 32 nm Xeon E7 */ + case 37: /* 32nm Westmere */ + case 44: /* 32nm Westmere-EP */ + case 47: /* 32nm Westmere-EX */ memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -2474,8 +2621,8 @@ __init int intel_pmu_init(void) pr_cont("Westmere events, "); break; - case 42: /* SandyBridge */ - case 45: /* SandyBridge, "Romely-EP" */ + case 42: /* 32nm SandyBridge */ + case 45: /* 32nm SandyBridge-E/EN/EP */ x86_add_quirk(intel_sandybridge_quirk); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -2506,8 +2653,9 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; - case 58: /* IvyBridge */ - case 62: /* IvyBridge EP */ + + case 58: /* 22nm IvyBridge */ + case 62: /* 22nm IvyBridge-EP/EX */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); /* dTLB-load-misses on IVB is different than SNB */ @@ -2539,20 +2687,19 @@ __init int intel_pmu_init(void) break; - case 60: /* Haswell Client */ - case 70: - case 71: - case 63: - case 69: + case 60: /* 22nm Haswell Core */ + case 63: /* 22nm Haswell Server */ + case 69: /* 22nm Haswell ULT */ + case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ x86_pmu.late_ack = true; - memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_snb(); x86_pmu.event_constraints = intel_hsw_event_constraints; x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; - x86_pmu.extra_regs = intel_snb_extra_regs; + x86_pmu.extra_regs = intel_snbep_extra_regs; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; @@ -2565,6 +2712,28 @@ __init int intel_pmu_init(void) pr_cont("Haswell events, "); break; + case 61: /* 14nm Broadwell Core-M */ + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_snb(); + + x86_pmu.event_constraints = intel_bdw_event_constraints; + x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; + x86_pmu.extra_regs = intel_snbep_extra_regs; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.er_flags |= ERF_HAS_RSP_1; + x86_pmu.er_flags |= ERF_NO_HT_SHARING; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = hsw_get_event_constraints; + x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.limit_period = bdw_limit_period; + pr_cont("Broadwell events, "); + break; + default: switch (x86_pmu.version) { case 1: diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 696ade311ded..b1553d05a5cb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -108,14 +108,16 @@ static u64 precise_store_data(u64 status) return val; } -static u64 precise_store_data_hsw(struct perf_event *event, u64 status) +static u64 precise_datala_hsw(struct perf_event *event, u64 status) { union perf_mem_data_src dse; - u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK; - dse.val = 0; - dse.mem_op = PERF_MEM_OP_STORE; - dse.mem_lvl = PERF_MEM_LVL_NA; + dse.val = PERF_MEM_NA; + + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) + dse.mem_op = PERF_MEM_OP_STORE; + else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW) + dse.mem_op = PERF_MEM_OP_LOAD; /* * L1 info only valid for following events: @@ -125,15 +127,12 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status) * MEM_UOPS_RETIRED.SPLIT_STORES * MEM_UOPS_RETIRED.ALL_STORES */ - if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0) - return dse.mem_lvl; - - if (status & 1) - dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; - else - dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; - - /* Nothing else supported. Sorry. */ + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) { + if (status & 1) + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + else + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; + } return dse.val; } @@ -569,28 +568,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_slm_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */ - INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */ - INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */ - INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */ - INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */ - INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */ - INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */ - INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */ - INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */ - INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */ - INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */ - INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */ - INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */ - INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), EVENT_CONSTRAINT_END }; @@ -626,68 +607,44 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = { struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ - INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), EVENT_CONSTRAINT_END }; struct event_constraint intel_ivb_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ - INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), EVENT_CONSTRAINT_END }; struct event_constraint intel_hsw_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ - INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ - INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ - INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */ - INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */ - /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), - /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ - INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), - INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ - /* MEM_UOPS_RETIRED.SPLIT_STORES */ - INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), - INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ - INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ - INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ - INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ - INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */ - /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ - INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf), - /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ - INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf), - /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ - INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf), - /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */ - INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf), - INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */ - INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */ - + INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), EVENT_CONSTRAINT_END }; @@ -864,6 +821,10 @@ static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs) static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { +#define PERF_X86_EVENT_PEBS_HSW_PREC \ + (PERF_X86_EVENT_PEBS_ST_HSW | \ + PERF_X86_EVENT_PEBS_LD_HSW | \ + PERF_X86_EVENT_PEBS_NA_HSW) /* * We cast to the biggest pebs_record but are careful not to * unconditionally access the 'extra' entries. @@ -873,42 +834,40 @@ static void __intel_pmu_pebs_event(struct perf_event *event, struct perf_sample_data data; struct pt_regs regs; u64 sample_type; - int fll, fst; + int fll, fst, dsrc; + int fl = event->hw.flags; if (!intel_pmu_save_and_restart(event)) return; - fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; - fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST | - PERF_X86_EVENT_PEBS_ST_HSW); + sample_type = event->attr.sample_type; + dsrc = sample_type & PERF_SAMPLE_DATA_SRC; + + fll = fl & PERF_X86_EVENT_PEBS_LDLAT; + fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); perf_sample_data_init(&data, 0, event->hw.last_period); data.period = event->hw.last_period; - sample_type = event->attr.sample_type; /* - * if PEBS-LL or PreciseStore + * Use latency for weight (only avail with PEBS-LL) */ - if (fll || fst) { - /* - * Use latency for weight (only avail with PEBS-LL) - */ - if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) - data.weight = pebs->lat; - - /* - * data.data_src encodes the data source - */ - if (sample_type & PERF_SAMPLE_DATA_SRC) { - if (fll) - data.data_src.val = load_latency_data(pebs->dse); - else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) - data.data_src.val = - precise_store_data_hsw(event, pebs->dse); - else - data.data_src.val = precise_store_data(pebs->dse); - } + if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) + data.weight = pebs->lat; + + /* + * data.data_src encodes the data source + */ + if (dsrc) { + u64 val = PERF_MEM_NA; + if (fll) + val = load_latency_data(pebs->dse); + else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) + val = precise_datala_hsw(event, pebs->dse); + else if (fst) + val = precise_store_data(pebs->dse); + data.data_src.val = val; } /* @@ -935,16 +894,16 @@ static void __intel_pmu_pebs_event(struct perf_event *event, else regs.flags &= ~PERF_EFLAGS_EXACT; - if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && + if ((sample_type & PERF_SAMPLE_ADDR) && x86_pmu.intel_cap.pebs_format >= 1) data.addr = pebs->dla; if (x86_pmu.intel_cap.pebs_format >= 2) { /* Only set the TSX weight when no memory weight. */ - if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll) + if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) data.weight = intel_hsw_weight(pebs); - if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION) + if (sample_type & PERF_SAMPLE_TRANSACTION) data.txn = intel_hsw_transaction(pebs); } @@ -1055,7 +1014,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) * BTS, PEBS probe and setup */ -void intel_ds_init(void) +void __init intel_ds_init(void) { /* * No support for 32bit formats diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 9dd2459a4c73..4af10617de33 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -697,7 +697,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { }; /* core */ -void intel_pmu_lbr_init_core(void) +void __init intel_pmu_lbr_init_core(void) { x86_pmu.lbr_nr = 4; x86_pmu.lbr_tos = MSR_LBR_TOS; @@ -712,7 +712,7 @@ void intel_pmu_lbr_init_core(void) } /* nehalem/westmere */ -void intel_pmu_lbr_init_nhm(void) +void __init intel_pmu_lbr_init_nhm(void) { x86_pmu.lbr_nr = 16; x86_pmu.lbr_tos = MSR_LBR_TOS; @@ -733,7 +733,7 @@ void intel_pmu_lbr_init_nhm(void) } /* sandy bridge */ -void intel_pmu_lbr_init_snb(void) +void __init intel_pmu_lbr_init_snb(void) { x86_pmu.lbr_nr = 16; x86_pmu.lbr_tos = MSR_LBR_TOS; @@ -753,7 +753,7 @@ void intel_pmu_lbr_init_snb(void) } /* atom */ -void intel_pmu_lbr_init_atom(void) +void __init intel_pmu_lbr_init_atom(void) { /* * only models starting at stepping 10 seems diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 0939f86f543d..9762dbd9f3f7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -1,83 +1,39 @@ #include "perf_event_intel_uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; -static struct intel_uncore_type **msr_uncores = empty_uncore; -static struct intel_uncore_type **pci_uncores = empty_uncore; -/* pci bus to socket mapping */ -static int pcibus_to_physid[256] = { [0 ... 255] = -1, }; +struct intel_uncore_type **uncore_msr_uncores = empty_uncore; +struct intel_uncore_type **uncore_pci_uncores = empty_uncore; -static struct pci_dev *extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; +static bool pcidrv_registered; +struct pci_driver *uncore_pci_driver; +/* pci bus to socket mapping */ +int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, }; +struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; static DEFINE_RAW_SPINLOCK(uncore_box_lock); - /* mask of cpus that collect uncore events */ static cpumask_t uncore_cpu_mask; /* constraint for the fixed counter */ -static struct event_constraint constraint_fixed = +static struct event_constraint uncore_constraint_fixed = EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); -static struct event_constraint constraint_empty = +struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); -#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ - ((1ULL << (n)) - 1))) - -DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); -DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); -DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); -DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); -DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); -DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); -DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); -DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); -DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); -DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); -DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); -DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); -DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); -DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); -DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); -DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); -DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47"); -DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); -DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22"); -DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); -DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60"); -DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7"); -DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); -DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); -DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31"); -DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51"); -DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35"); -DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31"); -DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17"); -DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12"); -DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8"); -DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4"); -DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63"); -DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51"); -DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35"); -DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31"); -DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17"); -DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12"); -DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8"); -DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); -DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); - -static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); -static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); -static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event); -static void uncore_pmu_event_read(struct perf_event *event); - -static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) +ssize_t uncore_event_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct uncore_event_desc *event = + container_of(attr, struct uncore_event_desc, attr); + return sprintf(buf, "%s", event->config); +} + +struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct intel_uncore_pmu, pmu); } -static struct intel_uncore_box * -uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) +struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { struct intel_uncore_box *box; @@ -86,6 +42,9 @@ uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) return box; raw_spin_lock(&uncore_box_lock); + /* Recheck in lock to handle races. */ + if (*per_cpu_ptr(pmu->box, cpu)) + goto out; list_for_each_entry(box, &pmu->box_list, list) { if (box->phys_id == topology_physical_package_id(cpu)) { atomic_inc(&box->refcnt); @@ -93,12 +52,13 @@ uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) break; } } +out: raw_spin_unlock(&uncore_box_lock); return *per_cpu_ptr(pmu->box, cpu); } -static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) +struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) { /* * perf core schedules event on the basis of cpu, uncore events are @@ -107,7 +67,7 @@ static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); } -static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) +u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) { u64 count; @@ -119,7 +79,7 @@ static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_eve /* * generic get constraint function for shared match/mask registers. */ -static struct event_constraint * +struct event_constraint * uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) { struct intel_uncore_extra_reg *er; @@ -154,10 +114,10 @@ uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) return NULL; } - return &constraint_empty; + return &uncore_constraint_empty; } -static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) { struct intel_uncore_extra_reg *er; struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; @@ -178,7 +138,7 @@ static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_even reg1->alloc = 0; } -static u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) +u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) { struct intel_uncore_extra_reg *er; unsigned long flags; @@ -193,2936 +153,6 @@ static u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) return config; } -/* Sandy Bridge-EP uncore support */ -static struct intel_uncore_type snbep_uncore_cbox; -static struct intel_uncore_type snbep_uncore_pcu; - -static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) -{ - struct pci_dev *pdev = box->pci_dev; - int box_ctl = uncore_pci_box_ctl(box); - u32 config = 0; - - if (!pci_read_config_dword(pdev, box_ctl, &config)) { - config |= SNBEP_PMON_BOX_CTL_FRZ; - pci_write_config_dword(pdev, box_ctl, config); - } -} - -static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) -{ - struct pci_dev *pdev = box->pci_dev; - int box_ctl = uncore_pci_box_ctl(box); - u32 config = 0; - - if (!pci_read_config_dword(pdev, box_ctl, &config)) { - config &= ~SNBEP_PMON_BOX_CTL_FRZ; - pci_write_config_dword(pdev, box_ctl, config); - } -} - -static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - - pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); -} - -static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - - pci_write_config_dword(pdev, hwc->config_base, hwc->config); -} - -static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - u64 count = 0; - - pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); - pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); - - return count; -} - -static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) -{ - struct pci_dev *pdev = box->pci_dev; - - pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT); -} - -static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) -{ - u64 config; - unsigned msr; - - msr = uncore_msr_box_ctl(box); - if (msr) { - rdmsrl(msr, config); - config |= SNBEP_PMON_BOX_CTL_FRZ; - wrmsrl(msr, config); - } -} - -static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) -{ - u64 config; - unsigned msr; - - msr = uncore_msr_box_ctl(box); - if (msr) { - rdmsrl(msr, config); - config &= ~SNBEP_PMON_BOX_CTL_FRZ; - wrmsrl(msr, config); - } -} - -static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - - if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0)); - - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); -} - -static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, - struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - wrmsrl(hwc->config_base, hwc->config); -} - -static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) -{ - unsigned msr = uncore_msr_box_ctl(box); - - if (msr) - wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); -} - -static struct attribute *snbep_uncore_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - NULL, -}; - -static struct attribute *snbep_uncore_ubox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh5.attr, - NULL, -}; - -static struct attribute *snbep_uncore_cbox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_tid_en.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - &format_attr_filter_tid.attr, - &format_attr_filter_nid.attr, - &format_attr_filter_state.attr, - &format_attr_filter_opc.attr, - NULL, -}; - -static struct attribute *snbep_uncore_pcu_formats_attr[] = { - &format_attr_event_ext.attr, - &format_attr_occ_sel.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh5.attr, - &format_attr_occ_invert.attr, - &format_attr_occ_edge.attr, - &format_attr_filter_band0.attr, - &format_attr_filter_band1.attr, - &format_attr_filter_band2.attr, - &format_attr_filter_band3.attr, - NULL, -}; - -static struct attribute *snbep_uncore_qpi_formats_attr[] = { - &format_attr_event_ext.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - &format_attr_match_rds.attr, - &format_attr_match_rnid30.attr, - &format_attr_match_rnid4.attr, - &format_attr_match_dnid.attr, - &format_attr_match_mc.attr, - &format_attr_match_opc.attr, - &format_attr_match_vnw.attr, - &format_attr_match0.attr, - &format_attr_match1.attr, - &format_attr_mask_rds.attr, - &format_attr_mask_rnid30.attr, - &format_attr_mask_rnid4.attr, - &format_attr_mask_dnid.attr, - &format_attr_mask_mc.attr, - &format_attr_mask_opc.attr, - &format_attr_mask_vnw.attr, - &format_attr_mask0.attr, - &format_attr_mask1.attr, - NULL, -}; - -static struct uncore_event_desc snbep_uncore_imc_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), - INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), - INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), - { /* end: all zeroes */ }, -}; - -static struct uncore_event_desc snbep_uncore_qpi_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), - INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), - INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"), - INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"), - { /* end: all zeroes */ }, -}; - -static struct attribute_group snbep_uncore_format_group = { - .name = "format", - .attrs = snbep_uncore_formats_attr, -}; - -static struct attribute_group snbep_uncore_ubox_format_group = { - .name = "format", - .attrs = snbep_uncore_ubox_formats_attr, -}; - -static struct attribute_group snbep_uncore_cbox_format_group = { - .name = "format", - .attrs = snbep_uncore_cbox_formats_attr, -}; - -static struct attribute_group snbep_uncore_pcu_format_group = { - .name = "format", - .attrs = snbep_uncore_pcu_formats_attr, -}; - -static struct attribute_group snbep_uncore_qpi_format_group = { - .name = "format", - .attrs = snbep_uncore_qpi_formats_attr, -}; - -#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ - .init_box = snbep_uncore_msr_init_box, \ - .disable_box = snbep_uncore_msr_disable_box, \ - .enable_box = snbep_uncore_msr_enable_box, \ - .disable_event = snbep_uncore_msr_disable_event, \ - .enable_event = snbep_uncore_msr_enable_event, \ - .read_counter = uncore_msr_read_counter - -static struct intel_uncore_ops snbep_uncore_msr_ops = { - SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), -}; - -#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \ - .init_box = snbep_uncore_pci_init_box, \ - .disable_box = snbep_uncore_pci_disable_box, \ - .enable_box = snbep_uncore_pci_enable_box, \ - .disable_event = snbep_uncore_pci_disable_event, \ - .read_counter = snbep_uncore_pci_read_counter - -static struct intel_uncore_ops snbep_uncore_pci_ops = { - SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), - .enable_event = snbep_uncore_pci_enable_event, \ -}; - -static struct event_constraint snbep_uncore_cbox_constraints[] = { - UNCORE_EVENT_CONSTRAINT(0x01, 0x1), - UNCORE_EVENT_CONSTRAINT(0x02, 0x3), - UNCORE_EVENT_CONSTRAINT(0x04, 0x3), - UNCORE_EVENT_CONSTRAINT(0x05, 0x3), - UNCORE_EVENT_CONSTRAINT(0x07, 0x3), - UNCORE_EVENT_CONSTRAINT(0x09, 0x3), - UNCORE_EVENT_CONSTRAINT(0x11, 0x1), - UNCORE_EVENT_CONSTRAINT(0x12, 0x3), - UNCORE_EVENT_CONSTRAINT(0x13, 0x3), - UNCORE_EVENT_CONSTRAINT(0x1b, 0xc), - UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), - UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), - UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), - EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), - UNCORE_EVENT_CONSTRAINT(0x21, 0x3), - UNCORE_EVENT_CONSTRAINT(0x23, 0x3), - UNCORE_EVENT_CONSTRAINT(0x31, 0x3), - UNCORE_EVENT_CONSTRAINT(0x32, 0x3), - UNCORE_EVENT_CONSTRAINT(0x33, 0x3), - UNCORE_EVENT_CONSTRAINT(0x34, 0x3), - UNCORE_EVENT_CONSTRAINT(0x35, 0x3), - UNCORE_EVENT_CONSTRAINT(0x36, 0x1), - UNCORE_EVENT_CONSTRAINT(0x37, 0x3), - UNCORE_EVENT_CONSTRAINT(0x38, 0x3), - UNCORE_EVENT_CONSTRAINT(0x39, 0x3), - UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), - EVENT_CONSTRAINT_END -}; - -static struct event_constraint snbep_uncore_r2pcie_constraints[] = { - UNCORE_EVENT_CONSTRAINT(0x10, 0x3), - UNCORE_EVENT_CONSTRAINT(0x11, 0x3), - UNCORE_EVENT_CONSTRAINT(0x12, 0x1), - UNCORE_EVENT_CONSTRAINT(0x23, 0x3), - UNCORE_EVENT_CONSTRAINT(0x24, 0x3), - UNCORE_EVENT_CONSTRAINT(0x25, 0x3), - UNCORE_EVENT_CONSTRAINT(0x26, 0x3), - UNCORE_EVENT_CONSTRAINT(0x32, 0x3), - UNCORE_EVENT_CONSTRAINT(0x33, 0x3), - UNCORE_EVENT_CONSTRAINT(0x34, 0x3), - EVENT_CONSTRAINT_END -}; - -static struct event_constraint snbep_uncore_r3qpi_constraints[] = { - UNCORE_EVENT_CONSTRAINT(0x10, 0x3), - UNCORE_EVENT_CONSTRAINT(0x11, 0x3), - UNCORE_EVENT_CONSTRAINT(0x12, 0x3), - UNCORE_EVENT_CONSTRAINT(0x13, 0x1), - UNCORE_EVENT_CONSTRAINT(0x20, 0x3), - UNCORE_EVENT_CONSTRAINT(0x21, 0x3), - UNCORE_EVENT_CONSTRAINT(0x22, 0x3), - UNCORE_EVENT_CONSTRAINT(0x23, 0x3), - UNCORE_EVENT_CONSTRAINT(0x24, 0x3), - UNCORE_EVENT_CONSTRAINT(0x25, 0x3), - UNCORE_EVENT_CONSTRAINT(0x26, 0x3), - UNCORE_EVENT_CONSTRAINT(0x28, 0x3), - UNCORE_EVENT_CONSTRAINT(0x29, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2a, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2b, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), - UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), - UNCORE_EVENT_CONSTRAINT(0x30, 0x3), - UNCORE_EVENT_CONSTRAINT(0x31, 0x3), - UNCORE_EVENT_CONSTRAINT(0x32, 0x3), - UNCORE_EVENT_CONSTRAINT(0x33, 0x3), - UNCORE_EVENT_CONSTRAINT(0x34, 0x3), - UNCORE_EVENT_CONSTRAINT(0x36, 0x3), - UNCORE_EVENT_CONSTRAINT(0x37, 0x3), - UNCORE_EVENT_CONSTRAINT(0x38, 0x3), - UNCORE_EVENT_CONSTRAINT(0x39, 0x3), - EVENT_CONSTRAINT_END -}; - -static struct intel_uncore_type snbep_uncore_ubox = { - .name = "ubox", - .num_counters = 2, - .num_boxes = 1, - .perf_ctr_bits = 44, - .fixed_ctr_bits = 48, - .perf_ctr = SNBEP_U_MSR_PMON_CTR0, - .event_ctl = SNBEP_U_MSR_PMON_CTL0, - .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, - .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, - .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, - .ops = &snbep_uncore_msr_ops, - .format_group = &snbep_uncore_ubox_format_group, -}; - -static struct extra_reg snbep_uncore_cbox_extra_regs[] = { - SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, - SNBEP_CBO_PMON_CTL_TID_EN, 0x1), - SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6), - SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6), - SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6), - SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), - SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa), - SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa), - SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa), - SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa), - SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), - SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2), - EVENT_EXTRA_END -}; - -static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct intel_uncore_extra_reg *er = &box->shared_regs[0]; - int i; - - if (uncore_box_is_fake(box)) - return; - - for (i = 0; i < 5; i++) { - if (reg1->alloc & (0x1 << i)) - atomic_sub(1 << (i * 6), &er->ref); - } - reg1->alloc = 0; -} - -static struct event_constraint * -__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event, - u64 (*cbox_filter_mask)(int fields)) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct intel_uncore_extra_reg *er = &box->shared_regs[0]; - int i, alloc = 0; - unsigned long flags; - u64 mask; - - if (reg1->idx == EXTRA_REG_NONE) - return NULL; - - raw_spin_lock_irqsave(&er->lock, flags); - for (i = 0; i < 5; i++) { - if (!(reg1->idx & (0x1 << i))) - continue; - if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) - continue; - - mask = cbox_filter_mask(0x1 << i); - if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) || - !((reg1->config ^ er->config) & mask)) { - atomic_add(1 << (i * 6), &er->ref); - er->config &= ~mask; - er->config |= reg1->config & mask; - alloc |= (0x1 << i); - } else { - break; - } - } - raw_spin_unlock_irqrestore(&er->lock, flags); - if (i < 5) - goto fail; - - if (!uncore_box_is_fake(box)) - reg1->alloc |= alloc; - - return NULL; -fail: - for (; i >= 0; i--) { - if (alloc & (0x1 << i)) - atomic_sub(1 << (i * 6), &er->ref); - } - return &constraint_empty; -} - -static u64 snbep_cbox_filter_mask(int fields) -{ - u64 mask = 0; - - if (fields & 0x1) - mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID; - if (fields & 0x2) - mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID; - if (fields & 0x4) - mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE; - if (fields & 0x8) - mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC; - - return mask; -} - -static struct event_constraint * -snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask); -} - -static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct extra_reg *er; - int idx = 0; - - for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) { - if (er->event != (event->hw.config & er->config_mask)) - continue; - idx |= er->idx; - } - - if (idx) { - reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + - SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; - reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx); - reg1->idx = idx; - } - return 0; -} - -static struct intel_uncore_ops snbep_uncore_cbox_ops = { - SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), - .hw_config = snbep_cbox_hw_config, - .get_constraint = snbep_cbox_get_constraint, - .put_constraint = snbep_cbox_put_constraint, -}; - -static struct intel_uncore_type snbep_uncore_cbox = { - .name = "cbox", - .num_counters = 4, - .num_boxes = 8, - .perf_ctr_bits = 44, - .event_ctl = SNBEP_C0_MSR_PMON_CTL0, - .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, - .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, - .msr_offset = SNBEP_CBO_MSR_OFFSET, - .num_shared_regs = 1, - .constraints = snbep_uncore_cbox_constraints, - .ops = &snbep_uncore_cbox_ops, - .format_group = &snbep_uncore_cbox_format_group, -}; - -static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - u64 config = reg1->config; - - if (new_idx > reg1->idx) - config <<= 8 * (new_idx - reg1->idx); - else - config >>= 8 * (reg1->idx - new_idx); - - if (modify) { - hwc->config += new_idx - reg1->idx; - reg1->config = config; - reg1->idx = new_idx; - } - return config; -} - -static struct event_constraint * -snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct intel_uncore_extra_reg *er = &box->shared_regs[0]; - unsigned long flags; - int idx = reg1->idx; - u64 mask, config1 = reg1->config; - bool ok = false; - - if (reg1->idx == EXTRA_REG_NONE || - (!uncore_box_is_fake(box) && reg1->alloc)) - return NULL; -again: - mask = 0xffULL << (idx * 8); - raw_spin_lock_irqsave(&er->lock, flags); - if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || - !((config1 ^ er->config) & mask)) { - atomic_add(1 << (idx * 8), &er->ref); - er->config &= ~mask; - er->config |= config1 & mask; - ok = true; - } - raw_spin_unlock_irqrestore(&er->lock, flags); - - if (!ok) { - idx = (idx + 1) % 4; - if (idx != reg1->idx) { - config1 = snbep_pcu_alter_er(event, idx, false); - goto again; - } - return &constraint_empty; - } - - if (!uncore_box_is_fake(box)) { - if (idx != reg1->idx) - snbep_pcu_alter_er(event, idx, true); - reg1->alloc = 1; - } - return NULL; -} - -static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct intel_uncore_extra_reg *er = &box->shared_regs[0]; - - if (uncore_box_is_fake(box) || !reg1->alloc) - return; - - atomic_sub(1 << (reg1->idx * 8), &er->ref); - reg1->alloc = 0; -} - -static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; - - if (ev_sel >= 0xb && ev_sel <= 0xe) { - reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; - reg1->idx = ev_sel - 0xb; - reg1->config = event->attr.config1 & (0xff << reg1->idx); - } - return 0; -} - -static struct intel_uncore_ops snbep_uncore_pcu_ops = { - SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), - .hw_config = snbep_pcu_hw_config, - .get_constraint = snbep_pcu_get_constraint, - .put_constraint = snbep_pcu_put_constraint, -}; - -static struct intel_uncore_type snbep_uncore_pcu = { - .name = "pcu", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, - .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, - .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, - .num_shared_regs = 1, - .ops = &snbep_uncore_pcu_ops, - .format_group = &snbep_uncore_pcu_format_group, -}; - -static struct intel_uncore_type *snbep_msr_uncores[] = { - &snbep_uncore_ubox, - &snbep_uncore_cbox, - &snbep_uncore_pcu, - NULL, -}; - -enum { - SNBEP_PCI_QPI_PORT0_FILTER, - SNBEP_PCI_QPI_PORT1_FILTER, -}; - -static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - - if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) { - reg1->idx = 0; - reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0; - reg1->config = event->attr.config1; - reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0; - reg2->config = event->attr.config2; - } - return 0; -} - -static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - - if (reg1->idx != EXTRA_REG_NONE) { - int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; - struct pci_dev *filter_pdev = extra_pci_dev[box->phys_id][idx]; - WARN_ON_ONCE(!filter_pdev); - if (filter_pdev) { - pci_write_config_dword(filter_pdev, reg1->reg, - (u32)reg1->config); - pci_write_config_dword(filter_pdev, reg1->reg + 4, - (u32)(reg1->config >> 32)); - pci_write_config_dword(filter_pdev, reg2->reg, - (u32)reg2->config); - pci_write_config_dword(filter_pdev, reg2->reg + 4, - (u32)(reg2->config >> 32)); - } - } - - pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); -} - -static struct intel_uncore_ops snbep_uncore_qpi_ops = { - SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), - .enable_event = snbep_qpi_enable_event, - .hw_config = snbep_qpi_hw_config, - .get_constraint = uncore_get_constraint, - .put_constraint = uncore_put_constraint, -}; - -#define SNBEP_UNCORE_PCI_COMMON_INIT() \ - .perf_ctr = SNBEP_PCI_PMON_CTR0, \ - .event_ctl = SNBEP_PCI_PMON_CTL0, \ - .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ - .ops = &snbep_uncore_pci_ops, \ - .format_group = &snbep_uncore_format_group - -static struct intel_uncore_type snbep_uncore_ha = { - .name = "ha", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - SNBEP_UNCORE_PCI_COMMON_INIT(), -}; - -static struct intel_uncore_type snbep_uncore_imc = { - .name = "imc", - .num_counters = 4, - .num_boxes = 4, - .perf_ctr_bits = 48, - .fixed_ctr_bits = 48, - .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, - .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, - .event_descs = snbep_uncore_imc_events, - SNBEP_UNCORE_PCI_COMMON_INIT(), -}; - -static struct intel_uncore_type snbep_uncore_qpi = { - .name = "qpi", - .num_counters = 4, - .num_boxes = 2, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCI_PMON_CTR0, - .event_ctl = SNBEP_PCI_PMON_CTL0, - .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, - .num_shared_regs = 1, - .ops = &snbep_uncore_qpi_ops, - .event_descs = snbep_uncore_qpi_events, - .format_group = &snbep_uncore_qpi_format_group, -}; - - -static struct intel_uncore_type snbep_uncore_r2pcie = { - .name = "r2pcie", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 44, - .constraints = snbep_uncore_r2pcie_constraints, - SNBEP_UNCORE_PCI_COMMON_INIT(), -}; - -static struct intel_uncore_type snbep_uncore_r3qpi = { - .name = "r3qpi", - .num_counters = 3, - .num_boxes = 2, - .perf_ctr_bits = 44, - .constraints = snbep_uncore_r3qpi_constraints, - SNBEP_UNCORE_PCI_COMMON_INIT(), -}; - -enum { - SNBEP_PCI_UNCORE_HA, - SNBEP_PCI_UNCORE_IMC, - SNBEP_PCI_UNCORE_QPI, - SNBEP_PCI_UNCORE_R2PCIE, - SNBEP_PCI_UNCORE_R3QPI, -}; - -static struct intel_uncore_type *snbep_pci_uncores[] = { - [SNBEP_PCI_UNCORE_HA] = &snbep_uncore_ha, - [SNBEP_PCI_UNCORE_IMC] = &snbep_uncore_imc, - [SNBEP_PCI_UNCORE_QPI] = &snbep_uncore_qpi, - [SNBEP_PCI_UNCORE_R2PCIE] = &snbep_uncore_r2pcie, - [SNBEP_PCI_UNCORE_R3QPI] = &snbep_uncore_r3qpi, - NULL, -}; - -static const struct pci_device_id snbep_uncore_pci_ids[] = { - { /* Home Agent */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0), - }, - { /* MC Channel 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0), - }, - { /* MC Channel 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1), - }, - { /* MC Channel 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2), - }, - { /* MC Channel 3 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3), - }, - { /* QPI Port 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0), - }, - { /* QPI Port 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1), - }, - { /* R2PCIe */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0), - }, - { /* R3QPI Link 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0), - }, - { /* R3QPI Link 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), - .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1), - }, - { /* QPI Port 0 filter */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - SNBEP_PCI_QPI_PORT0_FILTER), - }, - { /* QPI Port 0 filter */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - SNBEP_PCI_QPI_PORT1_FILTER), - }, - { /* end: all zeroes */ } -}; - -static struct pci_driver snbep_uncore_pci_driver = { - .name = "snbep_uncore", - .id_table = snbep_uncore_pci_ids, -}; - -/* - * build pci bus to socket mapping - */ -static int snbep_pci2phy_map_init(int devid) -{ - struct pci_dev *ubox_dev = NULL; - int i, bus, nodeid; - int err = 0; - u32 config = 0; - - while (1) { - /* find the UBOX device */ - ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev); - if (!ubox_dev) - break; - bus = ubox_dev->bus->number; - /* get the Node ID of the local register */ - err = pci_read_config_dword(ubox_dev, 0x40, &config); - if (err) - break; - nodeid = config; - /* get the Node ID mapping */ - err = pci_read_config_dword(ubox_dev, 0x54, &config); - if (err) - break; - /* - * every three bits in the Node ID mapping register maps - * to a particular node. - */ - for (i = 0; i < 8; i++) { - if (nodeid == ((config >> (3 * i)) & 0x7)) { - pcibus_to_physid[bus] = i; - break; - } - } - } - - if (!err) { - /* - * For PCI bus with no UBOX device, find the next bus - * that has UBOX device and use its mapping. - */ - i = -1; - for (bus = 255; bus >= 0; bus--) { - if (pcibus_to_physid[bus] >= 0) - i = pcibus_to_physid[bus]; - else - pcibus_to_physid[bus] = i; - } - } - - if (ubox_dev) - pci_dev_put(ubox_dev); - - return err ? pcibios_err_to_errno(err) : 0; -} -/* end of Sandy Bridge-EP uncore support */ - -/* IvyTown uncore support */ -static void ivt_uncore_msr_init_box(struct intel_uncore_box *box) -{ - unsigned msr = uncore_msr_box_ctl(box); - if (msr) - wrmsrl(msr, IVT_PMON_BOX_CTL_INT); -} - -static void ivt_uncore_pci_init_box(struct intel_uncore_box *box) -{ - struct pci_dev *pdev = box->pci_dev; - - pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVT_PMON_BOX_CTL_INT); -} - -#define IVT_UNCORE_MSR_OPS_COMMON_INIT() \ - .init_box = ivt_uncore_msr_init_box, \ - .disable_box = snbep_uncore_msr_disable_box, \ - .enable_box = snbep_uncore_msr_enable_box, \ - .disable_event = snbep_uncore_msr_disable_event, \ - .enable_event = snbep_uncore_msr_enable_event, \ - .read_counter = uncore_msr_read_counter - -static struct intel_uncore_ops ivt_uncore_msr_ops = { - IVT_UNCORE_MSR_OPS_COMMON_INIT(), -}; - -static struct intel_uncore_ops ivt_uncore_pci_ops = { - .init_box = ivt_uncore_pci_init_box, - .disable_box = snbep_uncore_pci_disable_box, - .enable_box = snbep_uncore_pci_enable_box, - .disable_event = snbep_uncore_pci_disable_event, - .enable_event = snbep_uncore_pci_enable_event, - .read_counter = snbep_uncore_pci_read_counter, -}; - -#define IVT_UNCORE_PCI_COMMON_INIT() \ - .perf_ctr = SNBEP_PCI_PMON_CTR0, \ - .event_ctl = SNBEP_PCI_PMON_CTL0, \ - .event_mask = IVT_PMON_RAW_EVENT_MASK, \ - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ - .ops = &ivt_uncore_pci_ops, \ - .format_group = &ivt_uncore_format_group - -static struct attribute *ivt_uncore_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - NULL, -}; - -static struct attribute *ivt_uncore_ubox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh5.attr, - NULL, -}; - -static struct attribute *ivt_uncore_cbox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_tid_en.attr, - &format_attr_thresh8.attr, - &format_attr_filter_tid.attr, - &format_attr_filter_link.attr, - &format_attr_filter_state2.attr, - &format_attr_filter_nid2.attr, - &format_attr_filter_opc2.attr, - NULL, -}; - -static struct attribute *ivt_uncore_pcu_formats_attr[] = { - &format_attr_event_ext.attr, - &format_attr_occ_sel.attr, - &format_attr_edge.attr, - &format_attr_thresh5.attr, - &format_attr_occ_invert.attr, - &format_attr_occ_edge.attr, - &format_attr_filter_band0.attr, - &format_attr_filter_band1.attr, - &format_attr_filter_band2.attr, - &format_attr_filter_band3.attr, - NULL, -}; - -static struct attribute *ivt_uncore_qpi_formats_attr[] = { - &format_attr_event_ext.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_thresh8.attr, - &format_attr_match_rds.attr, - &format_attr_match_rnid30.attr, - &format_attr_match_rnid4.attr, - &format_attr_match_dnid.attr, - &format_attr_match_mc.attr, - &format_attr_match_opc.attr, - &format_attr_match_vnw.attr, - &format_attr_match0.attr, - &format_attr_match1.attr, - &format_attr_mask_rds.attr, - &format_attr_mask_rnid30.attr, - &format_attr_mask_rnid4.attr, - &format_attr_mask_dnid.attr, - &format_attr_mask_mc.attr, - &format_attr_mask_opc.attr, - &format_attr_mask_vnw.attr, - &format_attr_mask0.attr, - &format_attr_mask1.attr, - NULL, -}; - -static struct attribute_group ivt_uncore_format_group = { - .name = "format", - .attrs = ivt_uncore_formats_attr, -}; - -static struct attribute_group ivt_uncore_ubox_format_group = { - .name = "format", - .attrs = ivt_uncore_ubox_formats_attr, -}; - -static struct attribute_group ivt_uncore_cbox_format_group = { - .name = "format", - .attrs = ivt_uncore_cbox_formats_attr, -}; - -static struct attribute_group ivt_uncore_pcu_format_group = { - .name = "format", - .attrs = ivt_uncore_pcu_formats_attr, -}; - -static struct attribute_group ivt_uncore_qpi_format_group = { - .name = "format", - .attrs = ivt_uncore_qpi_formats_attr, -}; - -static struct intel_uncore_type ivt_uncore_ubox = { - .name = "ubox", - .num_counters = 2, - .num_boxes = 1, - .perf_ctr_bits = 44, - .fixed_ctr_bits = 48, - .perf_ctr = SNBEP_U_MSR_PMON_CTR0, - .event_ctl = SNBEP_U_MSR_PMON_CTL0, - .event_mask = IVT_U_MSR_PMON_RAW_EVENT_MASK, - .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, - .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, - .ops = &ivt_uncore_msr_ops, - .format_group = &ivt_uncore_ubox_format_group, -}; - -static struct extra_reg ivt_uncore_cbox_extra_regs[] = { - SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, - SNBEP_CBO_PMON_CTL_TID_EN, 0x1), - SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), - - SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18), - SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18), - SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), - SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), - SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8), - EVENT_EXTRA_END -}; - -static u64 ivt_cbox_filter_mask(int fields) -{ - u64 mask = 0; - - if (fields & 0x1) - mask |= IVT_CB0_MSR_PMON_BOX_FILTER_TID; - if (fields & 0x2) - mask |= IVT_CB0_MSR_PMON_BOX_FILTER_LINK; - if (fields & 0x4) - mask |= IVT_CB0_MSR_PMON_BOX_FILTER_STATE; - if (fields & 0x8) - mask |= IVT_CB0_MSR_PMON_BOX_FILTER_NID; - if (fields & 0x10) - mask |= IVT_CB0_MSR_PMON_BOX_FILTER_OPC; - - return mask; -} - -static struct event_constraint * -ivt_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - return __snbep_cbox_get_constraint(box, event, ivt_cbox_filter_mask); -} - -static int ivt_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct extra_reg *er; - int idx = 0; - - for (er = ivt_uncore_cbox_extra_regs; er->msr; er++) { - if (er->event != (event->hw.config & er->config_mask)) - continue; - idx |= er->idx; - } - - if (idx) { - reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + - SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; - reg1->config = event->attr.config1 & ivt_cbox_filter_mask(idx); - reg1->idx = idx; - } - return 0; -} - -static void ivt_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - - if (reg1->idx != EXTRA_REG_NONE) { - u64 filter = uncore_shared_reg_config(box, 0); - wrmsrl(reg1->reg, filter & 0xffffffff); - wrmsrl(reg1->reg + 6, filter >> 32); - } - - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); -} - -static struct intel_uncore_ops ivt_uncore_cbox_ops = { - .init_box = ivt_uncore_msr_init_box, - .disable_box = snbep_uncore_msr_disable_box, - .enable_box = snbep_uncore_msr_enable_box, - .disable_event = snbep_uncore_msr_disable_event, - .enable_event = ivt_cbox_enable_event, - .read_counter = uncore_msr_read_counter, - .hw_config = ivt_cbox_hw_config, - .get_constraint = ivt_cbox_get_constraint, - .put_constraint = snbep_cbox_put_constraint, -}; - -static struct intel_uncore_type ivt_uncore_cbox = { - .name = "cbox", - .num_counters = 4, - .num_boxes = 15, - .perf_ctr_bits = 44, - .event_ctl = SNBEP_C0_MSR_PMON_CTL0, - .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, - .event_mask = IVT_CBO_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, - .msr_offset = SNBEP_CBO_MSR_OFFSET, - .num_shared_regs = 1, - .constraints = snbep_uncore_cbox_constraints, - .ops = &ivt_uncore_cbox_ops, - .format_group = &ivt_uncore_cbox_format_group, -}; - -static struct intel_uncore_ops ivt_uncore_pcu_ops = { - IVT_UNCORE_MSR_OPS_COMMON_INIT(), - .hw_config = snbep_pcu_hw_config, - .get_constraint = snbep_pcu_get_constraint, - .put_constraint = snbep_pcu_put_constraint, -}; - -static struct intel_uncore_type ivt_uncore_pcu = { - .name = "pcu", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, - .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, - .event_mask = IVT_PCU_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, - .num_shared_regs = 1, - .ops = &ivt_uncore_pcu_ops, - .format_group = &ivt_uncore_pcu_format_group, -}; - -static struct intel_uncore_type *ivt_msr_uncores[] = { - &ivt_uncore_ubox, - &ivt_uncore_cbox, - &ivt_uncore_pcu, - NULL, -}; - -static struct intel_uncore_type ivt_uncore_ha = { - .name = "ha", - .num_counters = 4, - .num_boxes = 2, - .perf_ctr_bits = 48, - IVT_UNCORE_PCI_COMMON_INIT(), -}; - -static struct intel_uncore_type ivt_uncore_imc = { - .name = "imc", - .num_counters = 4, - .num_boxes = 8, - .perf_ctr_bits = 48, - .fixed_ctr_bits = 48, - .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, - .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, - IVT_UNCORE_PCI_COMMON_INIT(), -}; - -/* registers in IRP boxes are not properly aligned */ -static unsigned ivt_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4}; -static unsigned ivt_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0}; - -static void ivt_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - - pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], - hwc->config | SNBEP_PMON_CTL_EN); -} - -static void ivt_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - - pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], hwc->config); -} - -static u64 ivt_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) -{ - struct pci_dev *pdev = box->pci_dev; - struct hw_perf_event *hwc = &event->hw; - u64 count = 0; - - pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx], (u32 *)&count); - pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); - - return count; -} - -static struct intel_uncore_ops ivt_uncore_irp_ops = { - .init_box = ivt_uncore_pci_init_box, - .disable_box = snbep_uncore_pci_disable_box, - .enable_box = snbep_uncore_pci_enable_box, - .disable_event = ivt_uncore_irp_disable_event, - .enable_event = ivt_uncore_irp_enable_event, - .read_counter = ivt_uncore_irp_read_counter, -}; - -static struct intel_uncore_type ivt_uncore_irp = { - .name = "irp", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .event_mask = IVT_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, - .ops = &ivt_uncore_irp_ops, - .format_group = &ivt_uncore_format_group, -}; - -static struct intel_uncore_ops ivt_uncore_qpi_ops = { - .init_box = ivt_uncore_pci_init_box, - .disable_box = snbep_uncore_pci_disable_box, - .enable_box = snbep_uncore_pci_enable_box, - .disable_event = snbep_uncore_pci_disable_event, - .enable_event = snbep_qpi_enable_event, - .read_counter = snbep_uncore_pci_read_counter, - .hw_config = snbep_qpi_hw_config, - .get_constraint = uncore_get_constraint, - .put_constraint = uncore_put_constraint, -}; - -static struct intel_uncore_type ivt_uncore_qpi = { - .name = "qpi", - .num_counters = 4, - .num_boxes = 3, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCI_PMON_CTR0, - .event_ctl = SNBEP_PCI_PMON_CTL0, - .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, - .num_shared_regs = 1, - .ops = &ivt_uncore_qpi_ops, - .format_group = &ivt_uncore_qpi_format_group, -}; - -static struct intel_uncore_type ivt_uncore_r2pcie = { - .name = "r2pcie", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 44, - .constraints = snbep_uncore_r2pcie_constraints, - IVT_UNCORE_PCI_COMMON_INIT(), -}; - -static struct intel_uncore_type ivt_uncore_r3qpi = { - .name = "r3qpi", - .num_counters = 3, - .num_boxes = 2, - .perf_ctr_bits = 44, - .constraints = snbep_uncore_r3qpi_constraints, - IVT_UNCORE_PCI_COMMON_INIT(), -}; - -enum { - IVT_PCI_UNCORE_HA, - IVT_PCI_UNCORE_IMC, - IVT_PCI_UNCORE_IRP, - IVT_PCI_UNCORE_QPI, - IVT_PCI_UNCORE_R2PCIE, - IVT_PCI_UNCORE_R3QPI, -}; - -static struct intel_uncore_type *ivt_pci_uncores[] = { - [IVT_PCI_UNCORE_HA] = &ivt_uncore_ha, - [IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc, - [IVT_PCI_UNCORE_IRP] = &ivt_uncore_irp, - [IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi, - [IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie, - [IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi, - NULL, -}; - -static const struct pci_device_id ivt_uncore_pci_ids[] = { - { /* Home Agent 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0), - }, - { /* Home Agent 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 1), - }, - { /* MC0 Channel 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 0), - }, - { /* MC0 Channel 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 1), - }, - { /* MC0 Channel 3 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 2), - }, - { /* MC0 Channel 4 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 3), - }, - { /* MC1 Channel 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 4), - }, - { /* MC1 Channel 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 5), - }, - { /* MC1 Channel 3 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 6), - }, - { /* MC1 Channel 4 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7), - }, - { /* IRP */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IRP, 0), - }, - { /* QPI0 Port 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0), - }, - { /* QPI0 Port 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 1), - }, - { /* QPI1 Port 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 2), - }, - { /* R2PCIe */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R2PCIE, 0), - }, - { /* R3QPI0 Link 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 0), - }, - { /* R3QPI0 Link 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 1), - }, - { /* R3QPI1 Link 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), - .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2), - }, - { /* QPI Port 0 filter */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - SNBEP_PCI_QPI_PORT0_FILTER), - }, - { /* QPI Port 0 filter */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - SNBEP_PCI_QPI_PORT1_FILTER), - }, - { /* end: all zeroes */ } -}; - -static struct pci_driver ivt_uncore_pci_driver = { - .name = "ivt_uncore", - .id_table = ivt_uncore_pci_ids, -}; -/* end of IvyTown uncore support */ - -/* Sandy Bridge uncore support */ -static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - if (hwc->idx < UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); - else - wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); -} - -static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - wrmsrl(event->hw.config_base, 0); -} - -static void snb_uncore_msr_init_box(struct intel_uncore_box *box) -{ - if (box->pmu->pmu_idx == 0) { - wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, - SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); - } -} - -static struct uncore_event_desc snb_uncore_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), - { /* end: all zeroes */ }, -}; - -static struct attribute *snb_uncore_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_cmask5.attr, - NULL, -}; - -static struct attribute_group snb_uncore_format_group = { - .name = "format", - .attrs = snb_uncore_formats_attr, -}; - -static struct intel_uncore_ops snb_uncore_msr_ops = { - .init_box = snb_uncore_msr_init_box, - .disable_event = snb_uncore_msr_disable_event, - .enable_event = snb_uncore_msr_enable_event, - .read_counter = uncore_msr_read_counter, -}; - -static struct event_constraint snb_uncore_cbox_constraints[] = { - UNCORE_EVENT_CONSTRAINT(0x80, 0x1), - UNCORE_EVENT_CONSTRAINT(0x83, 0x1), - EVENT_CONSTRAINT_END -}; - -static struct intel_uncore_type snb_uncore_cbox = { - .name = "cbox", - .num_counters = 2, - .num_boxes = 4, - .perf_ctr_bits = 44, - .fixed_ctr_bits = 48, - .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, - .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, - .fixed_ctr = SNB_UNC_FIXED_CTR, - .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, - .single_fixed = 1, - .event_mask = SNB_UNC_RAW_EVENT_MASK, - .msr_offset = SNB_UNC_CBO_MSR_OFFSET, - .constraints = snb_uncore_cbox_constraints, - .ops = &snb_uncore_msr_ops, - .format_group = &snb_uncore_format_group, - .event_descs = snb_uncore_events, -}; - -static struct intel_uncore_type *snb_msr_uncores[] = { - &snb_uncore_cbox, - NULL, -}; - -enum { - SNB_PCI_UNCORE_IMC, -}; - -static struct uncore_event_desc snb_uncore_imc_events[] = { - INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"), - INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"), - INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"), - - INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"), - INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), - INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), - - { /* end: all zeroes */ }, -}; - -#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff -#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48 - -/* page size multiple covering all config regs */ -#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000 - -#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1 -#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050 -#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2 -#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 -#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE - -static struct attribute *snb_uncore_imc_formats_attr[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group snb_uncore_imc_format_group = { - .name = "format", - .attrs = snb_uncore_imc_formats_attr, -}; - -static void snb_uncore_imc_init_box(struct intel_uncore_box *box) -{ - struct pci_dev *pdev = box->pci_dev; - int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET; - resource_size_t addr; - u32 pci_dword; - - pci_read_config_dword(pdev, where, &pci_dword); - addr = pci_dword; - -#ifdef CONFIG_PHYS_ADDR_T_64BIT - pci_read_config_dword(pdev, where + 4, &pci_dword); - addr |= ((resource_size_t)pci_dword << 32); -#endif - - addr &= ~(PAGE_SIZE - 1); - - box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); - box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; -} - -static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) -{} - -static void snb_uncore_imc_disable_box(struct intel_uncore_box *box) -{} - -static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{} - -static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) -{} - -static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); -} - -/* - * custom event_init() function because we define our own fixed, free - * running counters, so we do not want to conflict with generic uncore - * logic. Also simplifies processing - */ -static int snb_uncore_imc_event_init(struct perf_event *event) -{ - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - struct hw_perf_event *hwc = &event->hw; - u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK; - int idx, base; - - if (event->attr.type != event->pmu->type) - return -ENOENT; - - pmu = uncore_event_to_pmu(event); - /* no device found for this pmu */ - if (pmu->func_id < 0) - return -ENOENT; - - /* Sampling not supported yet */ - if (hwc->sample_period) - return -EINVAL; - - /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest || - event->attr.sample_period) /* no sampling */ - return -EINVAL; - - /* - * Place all uncore events for a particular physical package - * onto a single cpu - */ - if (event->cpu < 0) - return -EINVAL; - - /* check only supported bits are set */ - if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK) - return -EINVAL; - - box = uncore_pmu_to_box(pmu, event->cpu); - if (!box || box->cpu < 0) - return -EINVAL; - - event->cpu = box->cpu; - - event->hw.idx = -1; - event->hw.last_tag = ~0ULL; - event->hw.extra_reg.idx = EXTRA_REG_NONE; - event->hw.branch_reg.idx = EXTRA_REG_NONE; - /* - * check event is known (whitelist, determines counter) - */ - switch (cfg) { - case SNB_UNCORE_PCI_IMC_DATA_READS: - base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE; - idx = UNCORE_PMC_IDX_FIXED; - break; - case SNB_UNCORE_PCI_IMC_DATA_WRITES: - base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; - idx = UNCORE_PMC_IDX_FIXED + 1; - break; - default: - return -EINVAL; - } - - /* must be done before validate_group */ - event->hw.event_base = base; - event->hw.config = cfg; - event->hw.idx = idx; - - /* no group validation needed, we have free running counters */ - - return 0; -} - -static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - return 0; -} - -static void snb_uncore_imc_event_start(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - u64 count; - - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - - event->hw.state = 0; - box->n_active++; - - list_add_tail(&event->active_entry, &box->active_list); - - count = snb_uncore_imc_read_counter(box, event); - local64_set(&event->hw.prev_count, count); - - if (box->n_active == 1) - uncore_pmu_start_hrtimer(box); -} - -static void snb_uncore_imc_event_stop(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - struct hw_perf_event *hwc = &event->hw; - - if (!(hwc->state & PERF_HES_STOPPED)) { - box->n_active--; - - WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); - hwc->state |= PERF_HES_STOPPED; - - list_del(&event->active_entry); - - if (box->n_active == 0) - uncore_pmu_cancel_hrtimer(box); - } - - if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - uncore_perf_event_update(box, event); - hwc->state |= PERF_HES_UPTODATE; - } -} - -static int snb_uncore_imc_event_add(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - struct hw_perf_event *hwc = &event->hw; - - if (!box) - return -ENODEV; - - hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - if (!(flags & PERF_EF_START)) - hwc->state |= PERF_HES_ARCH; - - snb_uncore_imc_event_start(event, 0); - - box->n_events++; - - return 0; -} - -static void snb_uncore_imc_event_del(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - int i; - - snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); - - for (i = 0; i < box->n_events; i++) { - if (event == box->event_list[i]) { - --box->n_events; - break; - } - } -} - -static int snb_pci2phy_map_init(int devid) -{ - struct pci_dev *dev = NULL; - int bus; - - dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); - if (!dev) - return -ENOTTY; - - bus = dev->bus->number; - - pcibus_to_physid[bus] = 0; - - pci_dev_put(dev); - - return 0; -} - -static struct pmu snb_uncore_imc_pmu = { - .task_ctx_nr = perf_invalid_context, - .event_init = snb_uncore_imc_event_init, - .add = snb_uncore_imc_event_add, - .del = snb_uncore_imc_event_del, - .start = snb_uncore_imc_event_start, - .stop = snb_uncore_imc_event_stop, - .read = uncore_pmu_event_read, -}; - -static struct intel_uncore_ops snb_uncore_imc_ops = { - .init_box = snb_uncore_imc_init_box, - .enable_box = snb_uncore_imc_enable_box, - .disable_box = snb_uncore_imc_disable_box, - .disable_event = snb_uncore_imc_disable_event, - .enable_event = snb_uncore_imc_enable_event, - .hw_config = snb_uncore_imc_hw_config, - .read_counter = snb_uncore_imc_read_counter, -}; - -static struct intel_uncore_type snb_uncore_imc = { - .name = "imc", - .num_counters = 2, - .num_boxes = 1, - .fixed_ctr_bits = 32, - .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE, - .event_descs = snb_uncore_imc_events, - .format_group = &snb_uncore_imc_format_group, - .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE, - .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK, - .ops = &snb_uncore_imc_ops, - .pmu = &snb_uncore_imc_pmu, -}; - -static struct intel_uncore_type *snb_pci_uncores[] = { - [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc, - NULL, -}; - -static const struct pci_device_id snb_uncore_pci_ids[] = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* end: all zeroes */ }, -}; - -static const struct pci_device_id ivb_uncore_pci_ids[] = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* end: all zeroes */ }, -}; - -static const struct pci_device_id hsw_uncore_pci_ids[] = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* end: all zeroes */ }, -}; - -static struct pci_driver snb_uncore_pci_driver = { - .name = "snb_uncore", - .id_table = snb_uncore_pci_ids, -}; - -static struct pci_driver ivb_uncore_pci_driver = { - .name = "ivb_uncore", - .id_table = ivb_uncore_pci_ids, -}; - -static struct pci_driver hsw_uncore_pci_driver = { - .name = "hsw_uncore", - .id_table = hsw_uncore_pci_ids, -}; - -/* end of Sandy Bridge uncore support */ - -/* Nehalem uncore support */ -static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) -{ - wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); -} - -static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) -{ - wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); -} - -static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - if (hwc->idx < UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); - else - wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); -} - -static struct attribute *nhm_uncore_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_cmask8.attr, - NULL, -}; - -static struct attribute_group nhm_uncore_format_group = { - .name = "format", - .attrs = nhm_uncore_formats_attr, -}; - -static struct uncore_event_desc nhm_uncore_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), - INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"), - INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"), - INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"), - INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"), - INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"), - INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"), - INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"), - INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"), - { /* end: all zeroes */ }, -}; - -static struct intel_uncore_ops nhm_uncore_msr_ops = { - .disable_box = nhm_uncore_msr_disable_box, - .enable_box = nhm_uncore_msr_enable_box, - .disable_event = snb_uncore_msr_disable_event, - .enable_event = nhm_uncore_msr_enable_event, - .read_counter = uncore_msr_read_counter, -}; - -static struct intel_uncore_type nhm_uncore = { - .name = "", - .num_counters = 8, - .num_boxes = 1, - .perf_ctr_bits = 48, - .fixed_ctr_bits = 48, - .event_ctl = NHM_UNC_PERFEVTSEL0, - .perf_ctr = NHM_UNC_UNCORE_PMC0, - .fixed_ctr = NHM_UNC_FIXED_CTR, - .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL, - .event_mask = NHM_UNC_RAW_EVENT_MASK, - .event_descs = nhm_uncore_events, - .ops = &nhm_uncore_msr_ops, - .format_group = &nhm_uncore_format_group, -}; - -static struct intel_uncore_type *nhm_msr_uncores[] = { - &nhm_uncore, - NULL, -}; -/* end of Nehalem uncore support */ - -/* Nehalem-EX uncore support */ -DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5"); -DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7"); -DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63"); -DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63"); - -static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) -{ - wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); -} - -static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) -{ - unsigned msr = uncore_msr_box_ctl(box); - u64 config; - - if (msr) { - rdmsrl(msr, config); - config &= ~((1ULL << uncore_num_counters(box)) - 1); - /* WBox has a fixed counter */ - if (uncore_msr_fixed_ctl(box)) - config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN; - wrmsrl(msr, config); - } -} - -static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box) -{ - unsigned msr = uncore_msr_box_ctl(box); - u64 config; - - if (msr) { - rdmsrl(msr, config); - config |= (1ULL << uncore_num_counters(box)) - 1; - /* WBox has a fixed counter */ - if (uncore_msr_fixed_ctl(box)) - config |= NHMEX_W_PMON_GLOBAL_FIXED_EN; - wrmsrl(msr, config); - } -} - -static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - wrmsrl(event->hw.config_base, 0); -} - -static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - if (hwc->idx >= UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); - else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); - else - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); -} - -#define NHMEX_UNCORE_OPS_COMMON_INIT() \ - .init_box = nhmex_uncore_msr_init_box, \ - .disable_box = nhmex_uncore_msr_disable_box, \ - .enable_box = nhmex_uncore_msr_enable_box, \ - .disable_event = nhmex_uncore_msr_disable_event, \ - .read_counter = uncore_msr_read_counter - -static struct intel_uncore_ops nhmex_uncore_ops = { - NHMEX_UNCORE_OPS_COMMON_INIT(), - .enable_event = nhmex_uncore_msr_enable_event, -}; - -static struct attribute *nhmex_uncore_ubox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_edge.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_ubox_format_group = { - .name = "format", - .attrs = nhmex_uncore_ubox_formats_attr, -}; - -static struct intel_uncore_type nhmex_uncore_ubox = { - .name = "ubox", - .num_counters = 1, - .num_boxes = 1, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_U_MSR_PMON_EV_SEL, - .perf_ctr = NHMEX_U_MSR_PMON_CTR, - .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL, - .ops = &nhmex_uncore_ops, - .format_group = &nhmex_uncore_ubox_format_group -}; - -static struct attribute *nhmex_uncore_cbox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_cbox_format_group = { - .name = "format", - .attrs = nhmex_uncore_cbox_formats_attr, -}; - -/* msr offset for each instance of cbox */ -static unsigned nhmex_cbox_msr_offsets[] = { - 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0, -}; - -static struct intel_uncore_type nhmex_uncore_cbox = { - .name = "cbox", - .num_counters = 6, - .num_boxes = 10, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0, - .perf_ctr = NHMEX_C0_MSR_PMON_CTR0, - .event_mask = NHMEX_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL, - .msr_offsets = nhmex_cbox_msr_offsets, - .pair_ctr_ctl = 1, - .ops = &nhmex_uncore_ops, - .format_group = &nhmex_uncore_cbox_format_group -}; - -static struct uncore_event_desc nhmex_uncore_wbox_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"), - { /* end: all zeroes */ }, -}; - -static struct intel_uncore_type nhmex_uncore_wbox = { - .name = "wbox", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_W_MSR_PMON_CNT0, - .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0, - .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR, - .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL, - .event_mask = NHMEX_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_W_MSR_GLOBAL_CTL, - .pair_ctr_ctl = 1, - .event_descs = nhmex_uncore_wbox_events, - .ops = &nhmex_uncore_ops, - .format_group = &nhmex_uncore_cbox_format_group -}; - -static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - int ctr, ev_sel; - - ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >> - NHMEX_B_PMON_CTR_SHIFT; - ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >> - NHMEX_B_PMON_CTL_EV_SEL_SHIFT; - - /* events that do not use the match/mask registers */ - if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) || - (ctr == 2 && ev_sel != 0x4) || ctr == 3) - return 0; - - if (box->pmu->pmu_idx == 0) - reg1->reg = NHMEX_B0_MSR_MATCH; - else - reg1->reg = NHMEX_B1_MSR_MATCH; - reg1->idx = 0; - reg1->config = event->attr.config1; - reg2->config = event->attr.config2; - return 0; -} - -static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - - if (reg1->idx != EXTRA_REG_NONE) { - wrmsrl(reg1->reg, reg1->config); - wrmsrl(reg1->reg + 1, reg2->config); - } - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | - (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK)); -} - -/* - * The Bbox has 4 counters, but each counter monitors different events. - * Use bits 6-7 in the event config to select counter. - */ -static struct event_constraint nhmex_uncore_bbox_constraints[] = { - EVENT_CONSTRAINT(0 , 1, 0xc0), - EVENT_CONSTRAINT(0x40, 2, 0xc0), - EVENT_CONSTRAINT(0x80, 4, 0xc0), - EVENT_CONSTRAINT(0xc0, 8, 0xc0), - EVENT_CONSTRAINT_END, -}; - -static struct attribute *nhmex_uncore_bbox_formats_attr[] = { - &format_attr_event5.attr, - &format_attr_counter.attr, - &format_attr_match.attr, - &format_attr_mask.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_bbox_format_group = { - .name = "format", - .attrs = nhmex_uncore_bbox_formats_attr, -}; - -static struct intel_uncore_ops nhmex_uncore_bbox_ops = { - NHMEX_UNCORE_OPS_COMMON_INIT(), - .enable_event = nhmex_bbox_msr_enable_event, - .hw_config = nhmex_bbox_hw_config, - .get_constraint = uncore_get_constraint, - .put_constraint = uncore_put_constraint, -}; - -static struct intel_uncore_type nhmex_uncore_bbox = { - .name = "bbox", - .num_counters = 4, - .num_boxes = 2, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_B0_MSR_PMON_CTL0, - .perf_ctr = NHMEX_B0_MSR_PMON_CTR0, - .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL, - .msr_offset = NHMEX_B_MSR_OFFSET, - .pair_ctr_ctl = 1, - .num_shared_regs = 1, - .constraints = nhmex_uncore_bbox_constraints, - .ops = &nhmex_uncore_bbox_ops, - .format_group = &nhmex_uncore_bbox_format_group -}; - -static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - - /* only TO_R_PROG_EV event uses the match/mask register */ - if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) != - NHMEX_S_EVENT_TO_R_PROG_EV) - return 0; - - if (box->pmu->pmu_idx == 0) - reg1->reg = NHMEX_S0_MSR_MM_CFG; - else - reg1->reg = NHMEX_S1_MSR_MM_CFG; - reg1->idx = 0; - reg1->config = event->attr.config1; - reg2->config = event->attr.config2; - return 0; -} - -static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - - if (reg1->idx != EXTRA_REG_NONE) { - wrmsrl(reg1->reg, 0); - wrmsrl(reg1->reg + 1, reg1->config); - wrmsrl(reg1->reg + 2, reg2->config); - wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); - } - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); -} - -static struct attribute *nhmex_uncore_sbox_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_thresh8.attr, - &format_attr_match.attr, - &format_attr_mask.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_sbox_format_group = { - .name = "format", - .attrs = nhmex_uncore_sbox_formats_attr, -}; - -static struct intel_uncore_ops nhmex_uncore_sbox_ops = { - NHMEX_UNCORE_OPS_COMMON_INIT(), - .enable_event = nhmex_sbox_msr_enable_event, - .hw_config = nhmex_sbox_hw_config, - .get_constraint = uncore_get_constraint, - .put_constraint = uncore_put_constraint, -}; - -static struct intel_uncore_type nhmex_uncore_sbox = { - .name = "sbox", - .num_counters = 4, - .num_boxes = 2, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_S0_MSR_PMON_CTL0, - .perf_ctr = NHMEX_S0_MSR_PMON_CTR0, - .event_mask = NHMEX_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL, - .msr_offset = NHMEX_S_MSR_OFFSET, - .pair_ctr_ctl = 1, - .num_shared_regs = 1, - .ops = &nhmex_uncore_sbox_ops, - .format_group = &nhmex_uncore_sbox_format_group -}; - -enum { - EXTRA_REG_NHMEX_M_FILTER, - EXTRA_REG_NHMEX_M_DSP, - EXTRA_REG_NHMEX_M_ISS, - EXTRA_REG_NHMEX_M_MAP, - EXTRA_REG_NHMEX_M_MSC_THR, - EXTRA_REG_NHMEX_M_PGT, - EXTRA_REG_NHMEX_M_PLD, - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC, -}; - -static struct extra_reg nhmex_uncore_mbox_extra_regs[] = { - MBOX_INC_SEL_EXTAR_REG(0x0, DSP), - MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR), - MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR), - MBOX_INC_SEL_EXTAR_REG(0x9, ISS), - /* event 0xa uses two extra registers */ - MBOX_INC_SEL_EXTAR_REG(0xa, ISS), - MBOX_INC_SEL_EXTAR_REG(0xa, PLD), - MBOX_INC_SEL_EXTAR_REG(0xb, PLD), - /* events 0xd ~ 0x10 use the same extra register */ - MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC), - MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC), - MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC), - MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC), - MBOX_INC_SEL_EXTAR_REG(0x16, PGT), - MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP), - MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS), - MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT), - MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP), - EVENT_EXTRA_END -}; - -/* Nehalem-EX or Westmere-EX ? */ -static bool uncore_nhmex; - -static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config) -{ - struct intel_uncore_extra_reg *er; - unsigned long flags; - bool ret = false; - u64 mask; - - if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { - er = &box->shared_regs[idx]; - raw_spin_lock_irqsave(&er->lock, flags); - if (!atomic_read(&er->ref) || er->config == config) { - atomic_inc(&er->ref); - er->config = config; - ret = true; - } - raw_spin_unlock_irqrestore(&er->lock, flags); - - return ret; - } - /* - * The ZDP_CTL_FVC MSR has 4 fields which are used to control - * events 0xd ~ 0x10. Besides these 4 fields, there are additional - * fields which are shared. - */ - idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; - if (WARN_ON_ONCE(idx >= 4)) - return false; - - /* mask of the shared fields */ - if (uncore_nhmex) - mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK; - else - mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK; - er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; - - raw_spin_lock_irqsave(&er->lock, flags); - /* add mask of the non-shared field if it's in use */ - if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) { - if (uncore_nhmex) - mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - else - mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - } - - if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) { - atomic_add(1 << (idx * 8), &er->ref); - if (uncore_nhmex) - mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK | - NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - else - mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK | - WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - er->config &= ~mask; - er->config |= (config & mask); - ret = true; - } - raw_spin_unlock_irqrestore(&er->lock, flags); - - return ret; -} - -static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx) -{ - struct intel_uncore_extra_reg *er; - - if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { - er = &box->shared_regs[idx]; - atomic_dec(&er->ref); - return; - } - - idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; - er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; - atomic_sub(1 << (idx * 8), &er->ref); -} - -static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); - u64 config = reg1->config; - - /* get the non-shared control bits and shift them */ - idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; - if (uncore_nhmex) - config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - else - config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); - if (new_idx > orig_idx) { - idx = new_idx - orig_idx; - config <<= 3 * idx; - } else { - idx = orig_idx - new_idx; - config >>= 3 * idx; - } - - /* add the shared control bits back */ - if (uncore_nhmex) - config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; - else - config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; - config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; - if (modify) { - /* adjust the main event selector */ - if (new_idx > orig_idx) - hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; - else - hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; - reg1->config = config; - reg1->idx = ~0xff | new_idx; - } - return config; -} - -static struct event_constraint * -nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; - int i, idx[2], alloc = 0; - u64 config1 = reg1->config; - - idx[0] = __BITS_VALUE(reg1->idx, 0, 8); - idx[1] = __BITS_VALUE(reg1->idx, 1, 8); -again: - for (i = 0; i < 2; i++) { - if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) - idx[i] = 0xff; - - if (idx[i] == 0xff) - continue; - - if (!nhmex_mbox_get_shared_reg(box, idx[i], - __BITS_VALUE(config1, i, 32))) - goto fail; - alloc |= (0x1 << i); - } - - /* for the match/mask registers */ - if (reg2->idx != EXTRA_REG_NONE && - (uncore_box_is_fake(box) || !reg2->alloc) && - !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config)) - goto fail; - - /* - * If it's a fake box -- as per validate_{group,event}() we - * shouldn't touch event state and we can avoid doing so - * since both will only call get_event_constraints() once - * on each event, this avoids the need for reg->alloc. - */ - if (!uncore_box_is_fake(box)) { - if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) - nhmex_mbox_alter_er(event, idx[0], true); - reg1->alloc |= alloc; - if (reg2->idx != EXTRA_REG_NONE) - reg2->alloc = 1; - } - return NULL; -fail: - if (idx[0] != 0xff && !(alloc & 0x1) && - idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { - /* - * events 0xd ~ 0x10 are functional identical, but are - * controlled by different fields in the ZDP_CTL_FVC - * register. If we failed to take one field, try the - * rest 3 choices. - */ - BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff); - idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; - idx[0] = (idx[0] + 1) % 4; - idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; - if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) { - config1 = nhmex_mbox_alter_er(event, idx[0], false); - goto again; - } - } - - if (alloc & 0x1) - nhmex_mbox_put_shared_reg(box, idx[0]); - if (alloc & 0x2) - nhmex_mbox_put_shared_reg(box, idx[1]); - return &constraint_empty; -} - -static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; - - if (uncore_box_is_fake(box)) - return; - - if (reg1->alloc & 0x1) - nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8)); - if (reg1->alloc & 0x2) - nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8)); - reg1->alloc = 0; - - if (reg2->alloc) { - nhmex_mbox_put_shared_reg(box, reg2->idx); - reg2->alloc = 0; - } -} - -static int nhmex_mbox_extra_reg_idx(struct extra_reg *er) -{ - if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) - return er->idx; - return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd; -} - -static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct intel_uncore_type *type = box->pmu->type; - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; - struct extra_reg *er; - unsigned msr; - int reg_idx = 0; - /* - * The mbox events may require 2 extra MSRs at the most. But only - * the lower 32 bits in these MSRs are significant, so we can use - * config1 to pass two MSRs' config. - */ - for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) { - if (er->event != (event->hw.config & er->config_mask)) - continue; - if (event->attr.config1 & ~er->valid_mask) - return -EINVAL; - - msr = er->msr + type->msr_offset * box->pmu->pmu_idx; - if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff)) - return -EINVAL; - - /* always use the 32~63 bits to pass the PLD config */ - if (er->idx == EXTRA_REG_NHMEX_M_PLD) - reg_idx = 1; - else if (WARN_ON_ONCE(reg_idx > 0)) - return -EINVAL; - - reg1->idx &= ~(0xff << (reg_idx * 8)); - reg1->reg &= ~(0xffff << (reg_idx * 16)); - reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8); - reg1->reg |= msr << (reg_idx * 16); - reg1->config = event->attr.config1; - reg_idx++; - } - /* - * The mbox only provides ability to perform address matching - * for the PLD events. - */ - if (reg_idx == 2) { - reg2->idx = EXTRA_REG_NHMEX_M_FILTER; - if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN) - reg2->config = event->attr.config2; - else - reg2->config = ~0ULL; - if (box->pmu->pmu_idx == 0) - reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG; - else - reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG; - } - return 0; -} - -static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx) -{ - struct intel_uncore_extra_reg *er; - unsigned long flags; - u64 config; - - if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) - return box->shared_regs[idx].config; - - er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; - raw_spin_lock_irqsave(&er->lock, flags); - config = er->config; - raw_spin_unlock_irqrestore(&er->lock, flags); - return config; -} - -static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - int idx; - - idx = __BITS_VALUE(reg1->idx, 0, 8); - if (idx != 0xff) - wrmsrl(__BITS_VALUE(reg1->reg, 0, 16), - nhmex_mbox_shared_reg_config(box, idx)); - idx = __BITS_VALUE(reg1->idx, 1, 8); - if (idx != 0xff) - wrmsrl(__BITS_VALUE(reg1->reg, 1, 16), - nhmex_mbox_shared_reg_config(box, idx)); - - if (reg2->idx != EXTRA_REG_NONE) { - wrmsrl(reg2->reg, 0); - if (reg2->config != ~0ULL) { - wrmsrl(reg2->reg + 1, - reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK); - wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & - (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT)); - wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); - } - } - - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); -} - -DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3"); -DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5"); -DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6"); -DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7"); -DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13"); -DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21"); -DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63"); -DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33"); -DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61"); -DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31"); -DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63"); - -static struct attribute *nhmex_uncore_mbox_formats_attr[] = { - &format_attr_count_mode.attr, - &format_attr_storage_mode.attr, - &format_attr_wrap_mode.attr, - &format_attr_flag_mode.attr, - &format_attr_inc_sel.attr, - &format_attr_set_flag_sel.attr, - &format_attr_filter_cfg_en.attr, - &format_attr_filter_match.attr, - &format_attr_filter_mask.attr, - &format_attr_dsp.attr, - &format_attr_thr.attr, - &format_attr_fvc.attr, - &format_attr_pgt.attr, - &format_attr_map.attr, - &format_attr_iss.attr, - &format_attr_pld.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_mbox_format_group = { - .name = "format", - .attrs = nhmex_uncore_mbox_formats_attr, -}; - -static struct uncore_event_desc nhmex_uncore_mbox_events[] = { - INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"), - INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"), - { /* end: all zeroes */ }, -}; - -static struct uncore_event_desc wsmex_uncore_mbox_events[] = { - INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"), - INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"), - { /* end: all zeroes */ }, -}; - -static struct intel_uncore_ops nhmex_uncore_mbox_ops = { - NHMEX_UNCORE_OPS_COMMON_INIT(), - .enable_event = nhmex_mbox_msr_enable_event, - .hw_config = nhmex_mbox_hw_config, - .get_constraint = nhmex_mbox_get_constraint, - .put_constraint = nhmex_mbox_put_constraint, -}; - -static struct intel_uncore_type nhmex_uncore_mbox = { - .name = "mbox", - .num_counters = 6, - .num_boxes = 2, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_M0_MSR_PMU_CTL0, - .perf_ctr = NHMEX_M0_MSR_PMU_CNT0, - .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL, - .msr_offset = NHMEX_M_MSR_OFFSET, - .pair_ctr_ctl = 1, - .num_shared_regs = 8, - .event_descs = nhmex_uncore_mbox_events, - .ops = &nhmex_uncore_mbox_ops, - .format_group = &nhmex_uncore_mbox_format_group, -}; - -static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - - /* adjust the main event selector and extra register index */ - if (reg1->idx % 2) { - reg1->idx--; - hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; - } else { - reg1->idx++; - hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; - } - - /* adjust extra register config */ - switch (reg1->idx % 6) { - case 2: - /* shift the 8~15 bits to the 0~7 bits */ - reg1->config >>= 8; - break; - case 3: - /* shift the 0~7 bits to the 8~15 bits */ - reg1->config <<= 8; - break; - }; -} - -/* - * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7. - * An event set consists of 6 events, the 3rd and 4th events in - * an event set use the same extra register. So an event set uses - * 5 extra registers. - */ -static struct event_constraint * -nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - struct intel_uncore_extra_reg *er; - unsigned long flags; - int idx, er_idx; - u64 config1; - bool ok = false; - - if (!uncore_box_is_fake(box) && reg1->alloc) - return NULL; - - idx = reg1->idx % 6; - config1 = reg1->config; -again: - er_idx = idx; - /* the 3rd and 4th events use the same extra register */ - if (er_idx > 2) - er_idx--; - er_idx += (reg1->idx / 6) * 5; - - er = &box->shared_regs[er_idx]; - raw_spin_lock_irqsave(&er->lock, flags); - if (idx < 2) { - if (!atomic_read(&er->ref) || er->config == reg1->config) { - atomic_inc(&er->ref); - er->config = reg1->config; - ok = true; - } - } else if (idx == 2 || idx == 3) { - /* - * these two events use different fields in a extra register, - * the 0~7 bits and the 8~15 bits respectively. - */ - u64 mask = 0xff << ((idx - 2) * 8); - if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) || - !((er->config ^ config1) & mask)) { - atomic_add(1 << ((idx - 2) * 8), &er->ref); - er->config &= ~mask; - er->config |= config1 & mask; - ok = true; - } - } else { - if (!atomic_read(&er->ref) || - (er->config == (hwc->config >> 32) && - er->config1 == reg1->config && - er->config2 == reg2->config)) { - atomic_inc(&er->ref); - er->config = (hwc->config >> 32); - er->config1 = reg1->config; - er->config2 = reg2->config; - ok = true; - } - } - raw_spin_unlock_irqrestore(&er->lock, flags); - - if (!ok) { - /* - * The Rbox events are always in pairs. The paired - * events are functional identical, but use different - * extra registers. If we failed to take an extra - * register, try the alternative. - */ - idx ^= 1; - if (idx != reg1->idx % 6) { - if (idx == 2) - config1 >>= 8; - else if (idx == 3) - config1 <<= 8; - goto again; - } - } else { - if (!uncore_box_is_fake(box)) { - if (idx != reg1->idx % 6) - nhmex_rbox_alter_er(box, event); - reg1->alloc = 1; - } - return NULL; - } - return &constraint_empty; -} - -static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) -{ - struct intel_uncore_extra_reg *er; - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - int idx, er_idx; - - if (uncore_box_is_fake(box) || !reg1->alloc) - return; - - idx = reg1->idx % 6; - er_idx = idx; - if (er_idx > 2) - er_idx--; - er_idx += (reg1->idx / 6) * 5; - - er = &box->shared_regs[er_idx]; - if (idx == 2 || idx == 3) - atomic_sub(1 << ((idx - 2) * 8), &er->ref); - else - atomic_dec(&er->ref); - - reg1->alloc = 0; -} - -static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; - int idx; - - idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >> - NHMEX_R_PMON_CTL_EV_SEL_SHIFT; - if (idx >= 0x18) - return -EINVAL; - - reg1->idx = idx; - reg1->config = event->attr.config1; - - switch (idx % 6) { - case 4: - case 5: - hwc->config |= event->attr.config & (~0ULL << 32); - reg2->config = event->attr.config2; - break; - }; - return 0; -} - -static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - struct hw_perf_event_extra *reg2 = &hwc->branch_reg; - int idx, port; - - idx = reg1->idx; - port = idx / 6 + box->pmu->pmu_idx * 4; - - switch (idx % 6) { - case 0: - wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config); - break; - case 1: - wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config); - break; - case 2: - case 3: - wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port), - uncore_shared_reg_config(box, 2 + (idx / 6) * 5)); - break; - case 4: - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), - hwc->config >> 32); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config); - break; - case 5: - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port), - hwc->config >> 32); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config); - break; - }; - - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | - (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK)); -} - -DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63"); -DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63"); -DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63"); -DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15"); -DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31"); - -static struct attribute *nhmex_uncore_rbox_formats_attr[] = { - &format_attr_event5.attr, - &format_attr_xbr_mm_cfg.attr, - &format_attr_xbr_match.attr, - &format_attr_xbr_mask.attr, - &format_attr_qlx_cfg.attr, - &format_attr_iperf_cfg.attr, - NULL, -}; - -static struct attribute_group nhmex_uncore_rbox_format_group = { - .name = "format", - .attrs = nhmex_uncore_rbox_formats_attr, -}; - -static struct uncore_event_desc nhmex_uncore_rbox_events[] = { - INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"), - INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"), - INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"), - INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"), - INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"), - INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"), - { /* end: all zeroes */ }, -}; - -static struct intel_uncore_ops nhmex_uncore_rbox_ops = { - NHMEX_UNCORE_OPS_COMMON_INIT(), - .enable_event = nhmex_rbox_msr_enable_event, - .hw_config = nhmex_rbox_hw_config, - .get_constraint = nhmex_rbox_get_constraint, - .put_constraint = nhmex_rbox_put_constraint, -}; - -static struct intel_uncore_type nhmex_uncore_rbox = { - .name = "rbox", - .num_counters = 8, - .num_boxes = 2, - .perf_ctr_bits = 48, - .event_ctl = NHMEX_R_MSR_PMON_CTL0, - .perf_ctr = NHMEX_R_MSR_PMON_CNT0, - .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK, - .box_ctl = NHMEX_R_MSR_GLOBAL_CTL, - .msr_offset = NHMEX_R_MSR_OFFSET, - .pair_ctr_ctl = 1, - .num_shared_regs = 20, - .event_descs = nhmex_uncore_rbox_events, - .ops = &nhmex_uncore_rbox_ops, - .format_group = &nhmex_uncore_rbox_format_group -}; - -static struct intel_uncore_type *nhmex_msr_uncores[] = { - &nhmex_uncore_ubox, - &nhmex_uncore_cbox, - &nhmex_uncore_bbox, - &nhmex_uncore_sbox, - &nhmex_uncore_mbox, - &nhmex_uncore_rbox, - &nhmex_uncore_wbox, - NULL, -}; -/* end of Nehalem-EX uncore support */ - static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) { struct hw_perf_event *hwc = &event->hw; @@ -3140,7 +170,7 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_eve hwc->event_base = uncore_perf_ctr(box, hwc->idx); } -static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) +void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) { u64 prev_count, new_count, delta; int shift; @@ -3201,14 +231,14 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) return HRTIMER_RESTART; } -static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) +void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) { __hrtimer_start_range_ns(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 0, HRTIMER_MODE_REL_PINNED, 0); } -static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) +void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) { hrtimer_cancel(&box->hrtimer); } @@ -3291,7 +321,7 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve } if (event->attr.config == UNCORE_FIXED_EVENT) - return &constraint_fixed; + return &uncore_constraint_fixed; if (type->constraints) { for_each_event_constraint(c, type->constraints) { @@ -3496,7 +526,7 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags) event->hw.last_tag = ~0ULL; } -static void uncore_pmu_event_read(struct perf_event *event) +void uncore_pmu_event_read(struct perf_event *event) { struct intel_uncore_box *box = uncore_event_to_box(event); uncore_perf_event_update(box, event); @@ -3635,7 +665,7 @@ static struct attribute_group uncore_pmu_attr_group = { .attrs = uncore_pmu_attrs, }; -static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) +static int uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -3758,9 +788,6 @@ fail: return ret; } -static struct pci_driver *uncore_pci_driver; -static bool pcidrv_registered; - /* * add a pci uncore device */ @@ -3770,18 +797,20 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id struct intel_uncore_box *box; struct intel_uncore_type *type; int phys_id; + bool first_box = false; - phys_id = pcibus_to_physid[pdev->bus->number]; + phys_id = uncore_pcibus_to_physid[pdev->bus->number]; if (phys_id < 0) return -ENODEV; if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { - extra_pci_dev[phys_id][UNCORE_PCI_DEV_IDX(id->driver_data)] = pdev; + int idx = UNCORE_PCI_DEV_IDX(id->driver_data); + uncore_extra_pci_dev[phys_id][idx] = pdev; pci_set_drvdata(pdev, NULL); return 0; } - type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; box = uncore_alloc_box(type, NUMA_NO_NODE); if (!box) return -ENOMEM; @@ -3803,9 +832,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id pci_set_drvdata(pdev, box); raw_spin_lock(&uncore_box_lock); + if (list_empty(&pmu->box_list)) + first_box = true; list_add_tail(&box->list, &pmu->box_list); raw_spin_unlock(&uncore_box_lock); + if (first_box) + uncore_pmu_register(pmu); return 0; } @@ -3813,13 +846,14 @@ static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box = pci_get_drvdata(pdev); struct intel_uncore_pmu *pmu; - int i, cpu, phys_id = pcibus_to_physid[pdev->bus->number]; + int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number]; + bool last_box = false; box = pci_get_drvdata(pdev); if (!box) { for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { - if (extra_pci_dev[phys_id][i] == pdev) { - extra_pci_dev[phys_id][i] = NULL; + if (uncore_extra_pci_dev[phys_id][i] == pdev) { + uncore_extra_pci_dev[phys_id][i] = NULL; break; } } @@ -3835,6 +869,8 @@ static void uncore_pci_remove(struct pci_dev *pdev) raw_spin_lock(&uncore_box_lock); list_del(&box->list); + if (list_empty(&pmu->box_list)) + last_box = true; raw_spin_unlock(&uncore_box_lock); for_each_possible_cpu(cpu) { @@ -3846,6 +882,9 @@ static void uncore_pci_remove(struct pci_dev *pdev) WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); kfree(box); + + if (last_box) + perf_pmu_unregister(&pmu->pmu); } static int __init uncore_pci_init(void) @@ -3854,46 +893,32 @@ static int __init uncore_pci_init(void) switch (boot_cpu_data.x86_model) { case 45: /* Sandy Bridge-EP */ - ret = snbep_pci2phy_map_init(0x3ce0); - if (ret) - return ret; - pci_uncores = snbep_pci_uncores; - uncore_pci_driver = &snbep_uncore_pci_driver; + ret = snbep_uncore_pci_init(); break; - case 62: /* IvyTown */ - ret = snbep_pci2phy_map_init(0x0e1e); - if (ret) - return ret; - pci_uncores = ivt_pci_uncores; - uncore_pci_driver = &ivt_uncore_pci_driver; + case 62: /* Ivy Bridge-EP */ + ret = ivbep_uncore_pci_init(); + break; + case 63: /* Haswell-EP */ + ret = hswep_uncore_pci_init(); break; case 42: /* Sandy Bridge */ - ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC); - if (ret) - return ret; - pci_uncores = snb_pci_uncores; - uncore_pci_driver = &snb_uncore_pci_driver; + ret = snb_uncore_pci_init(); break; case 58: /* Ivy Bridge */ - ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC); - if (ret) - return ret; - pci_uncores = snb_pci_uncores; - uncore_pci_driver = &ivb_uncore_pci_driver; + ret = ivb_uncore_pci_init(); break; case 60: /* Haswell */ case 69: /* Haswell Celeron */ - ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC); - if (ret) - return ret; - pci_uncores = snb_pci_uncores; - uncore_pci_driver = &hsw_uncore_pci_driver; + ret = hsw_uncore_pci_init(); break; default: return 0; } - ret = uncore_types_init(pci_uncores); + if (ret) + return ret; + + ret = uncore_types_init(uncore_pci_uncores); if (ret) return ret; @@ -3904,7 +929,7 @@ static int __init uncore_pci_init(void) if (ret == 0) pcidrv_registered = true; else - uncore_types_exit(pci_uncores); + uncore_types_exit(uncore_pci_uncores); return ret; } @@ -3914,7 +939,7 @@ static void __init uncore_pci_exit(void) if (pcidrv_registered) { pcidrv_registered = false; pci_unregister_driver(uncore_pci_driver); - uncore_types_exit(pci_uncores); + uncore_types_exit(uncore_pci_uncores); } } @@ -3940,8 +965,8 @@ static void uncore_cpu_dying(int cpu) struct intel_uncore_box *box; int i, j; - for (i = 0; msr_uncores[i]; i++) { - type = msr_uncores[i]; + for (i = 0; uncore_msr_uncores[i]; i++) { + type = uncore_msr_uncores[i]; for (j = 0; j < type->num_boxes; j++) { pmu = &type->pmus[j]; box = *per_cpu_ptr(pmu->box, cpu); @@ -3961,8 +986,8 @@ static int uncore_cpu_starting(int cpu) phys_id = topology_physical_package_id(cpu); - for (i = 0; msr_uncores[i]; i++) { - type = msr_uncores[i]; + for (i = 0; uncore_msr_uncores[i]; i++) { + type = uncore_msr_uncores[i]; for (j = 0; j < type->num_boxes; j++) { pmu = &type->pmus[j]; box = *per_cpu_ptr(pmu->box, cpu); @@ -4002,8 +1027,8 @@ static int uncore_cpu_prepare(int cpu, int phys_id) struct intel_uncore_box *box; int i, j; - for (i = 0; msr_uncores[i]; i++) { - type = msr_uncores[i]; + for (i = 0; uncore_msr_uncores[i]; i++) { + type = uncore_msr_uncores[i]; for (j = 0; j < type->num_boxes; j++) { pmu = &type->pmus[j]; if (pmu->func_id < 0) @@ -4083,8 +1108,8 @@ static void uncore_event_exit_cpu(int cpu) if (target >= 0) cpumask_set_cpu(target, &uncore_cpu_mask); - uncore_change_context(msr_uncores, cpu, target); - uncore_change_context(pci_uncores, cpu, target); + uncore_change_context(uncore_msr_uncores, cpu, target); + uncore_change_context(uncore_pci_uncores, cpu, target); } static void uncore_event_init_cpu(int cpu) @@ -4099,8 +1124,8 @@ static void uncore_event_init_cpu(int cpu) cpumask_set_cpu(cpu, &uncore_cpu_mask); - uncore_change_context(msr_uncores, -1, cpu); - uncore_change_context(pci_uncores, -1, cpu); + uncore_change_context(uncore_msr_uncores, -1, cpu); + uncore_change_context(uncore_pci_uncores, -1, cpu); } static int uncore_cpu_notifier(struct notifier_block *self, @@ -4160,47 +1185,37 @@ static void __init uncore_cpu_setup(void *dummy) static int __init uncore_cpu_init(void) { - int ret, max_cores; + int ret; - max_cores = boot_cpu_data.x86_max_cores; switch (boot_cpu_data.x86_model) { case 26: /* Nehalem */ case 30: case 37: /* Westmere */ case 44: - msr_uncores = nhm_msr_uncores; + nhm_uncore_cpu_init(); break; case 42: /* Sandy Bridge */ case 58: /* Ivy Bridge */ - if (snb_uncore_cbox.num_boxes > max_cores) - snb_uncore_cbox.num_boxes = max_cores; - msr_uncores = snb_msr_uncores; + snb_uncore_cpu_init(); break; case 45: /* Sandy Bridge-EP */ - if (snbep_uncore_cbox.num_boxes > max_cores) - snbep_uncore_cbox.num_boxes = max_cores; - msr_uncores = snbep_msr_uncores; + snbep_uncore_cpu_init(); break; case 46: /* Nehalem-EX */ - uncore_nhmex = true; case 47: /* Westmere-EX aka. Xeon E7 */ - if (!uncore_nhmex) - nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; - if (nhmex_uncore_cbox.num_boxes > max_cores) - nhmex_uncore_cbox.num_boxes = max_cores; - msr_uncores = nhmex_msr_uncores; + nhmex_uncore_cpu_init(); break; - case 62: /* IvyTown */ - if (ivt_uncore_cbox.num_boxes > max_cores) - ivt_uncore_cbox.num_boxes = max_cores; - msr_uncores = ivt_msr_uncores; + case 62: /* Ivy Bridge-EP */ + ivbep_uncore_cpu_init(); + break; + case 63: /* Haswell-EP */ + hswep_uncore_cpu_init(); break; - default: return 0; } - ret = uncore_types_init(msr_uncores); + ret = uncore_types_init(uncore_msr_uncores); if (ret) return ret; @@ -4213,16 +1228,8 @@ static int __init uncore_pmus_register(void) struct intel_uncore_type *type; int i, j; - for (i = 0; msr_uncores[i]; i++) { - type = msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - uncore_pmu_register(pmu); - } - } - - for (i = 0; pci_uncores[i]; i++) { - type = pci_uncores[i]; + for (i = 0; uncore_msr_uncores[i]; i++) { + type = uncore_msr_uncores[i]; for (j = 0; j < type->num_boxes; j++) { pmu = &type->pmus[j]; uncore_pmu_register(pmu); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 90236f0c94a9..18eb78bbdd10 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -24,395 +24,6 @@ #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) -/* SNB event control */ -#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff -#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 -#define SNB_UNC_CTL_EDGE_DET (1 << 18) -#define SNB_UNC_CTL_EN (1 << 22) -#define SNB_UNC_CTL_INVERT (1 << 23) -#define SNB_UNC_CTL_CMASK_MASK 0x1f000000 -#define NHM_UNC_CTL_CMASK_MASK 0xff000000 -#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0) - -#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ - SNB_UNC_CTL_UMASK_MASK | \ - SNB_UNC_CTL_EDGE_DET | \ - SNB_UNC_CTL_INVERT | \ - SNB_UNC_CTL_CMASK_MASK) - -#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ - SNB_UNC_CTL_UMASK_MASK | \ - SNB_UNC_CTL_EDGE_DET | \ - SNB_UNC_CTL_INVERT | \ - NHM_UNC_CTL_CMASK_MASK) - -/* SNB global control register */ -#define SNB_UNC_PERF_GLOBAL_CTL 0x391 -#define SNB_UNC_FIXED_CTR_CTRL 0x394 -#define SNB_UNC_FIXED_CTR 0x395 - -/* SNB uncore global control */ -#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1) -#define SNB_UNC_GLOBAL_CTL_EN (1 << 29) - -/* SNB Cbo register */ -#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700 -#define SNB_UNC_CBO_0_PER_CTR0 0x706 -#define SNB_UNC_CBO_MSR_OFFSET 0x10 - -/* NHM global control register */ -#define NHM_UNC_PERF_GLOBAL_CTL 0x391 -#define NHM_UNC_FIXED_CTR 0x394 -#define NHM_UNC_FIXED_CTR_CTRL 0x395 - -/* NHM uncore global control */ -#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1) -#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32) - -/* NHM uncore register */ -#define NHM_UNC_PERFEVTSEL0 0x3c0 -#define NHM_UNC_UNCORE_PMC0 0x3b0 - -/* SNB-EP Box level control */ -#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) -#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1) -#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8) -#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16) -#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ - SNBEP_PMON_BOX_CTL_RST_CTRS | \ - SNBEP_PMON_BOX_CTL_FRZ_EN) -/* SNB-EP event control */ -#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff -#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00 -#define SNBEP_PMON_CTL_RST (1 << 17) -#define SNBEP_PMON_CTL_EDGE_DET (1 << 18) -#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) -#define SNBEP_PMON_CTL_EN (1 << 22) -#define SNBEP_PMON_CTL_INVERT (1 << 23) -#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000 -#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_UMASK_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PMON_CTL_INVERT | \ - SNBEP_PMON_CTL_TRESH_MASK) - -/* SNB-EP Ubox event control */ -#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000 -#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \ - (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_UMASK_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PMON_CTL_INVERT | \ - SNBEP_U_MSR_PMON_CTL_TRESH_MASK) - -#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19) -#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ - SNBEP_CBO_PMON_CTL_TID_EN) - -/* SNB-EP PCU event control */ -#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000 -#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000 -#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30) -#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31) -#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ - (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PMON_CTL_EV_SEL_EXT | \ - SNBEP_PMON_CTL_INVERT | \ - SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) - -#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ - (SNBEP_PMON_RAW_EVENT_MASK | \ - SNBEP_PMON_CTL_EV_SEL_EXT) - -/* SNB-EP pci control register */ -#define SNBEP_PCI_PMON_BOX_CTL 0xf4 -#define SNBEP_PCI_PMON_CTL0 0xd8 -/* SNB-EP pci counter register */ -#define SNBEP_PCI_PMON_CTR0 0xa0 - -/* SNB-EP home agent register */ -#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40 -#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44 -#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48 -/* SNB-EP memory controller register */ -#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0 -#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0 -/* SNB-EP QPI register */ -#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228 -#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c -#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238 -#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c - -/* SNB-EP Ubox register */ -#define SNBEP_U_MSR_PMON_CTR0 0xc16 -#define SNBEP_U_MSR_PMON_CTL0 0xc10 - -#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08 -#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09 - -/* SNB-EP Cbo register */ -#define SNBEP_C0_MSR_PMON_CTR0 0xd16 -#define SNBEP_C0_MSR_PMON_CTL0 0xd10 -#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04 -#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 -#define SNBEP_CBO_MSR_OFFSET 0x20 - -#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f -#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00 -#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000 -#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000 - -#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \ - .event = (e), \ - .msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \ - .config_mask = (m), \ - .idx = (i) \ -} - -/* SNB-EP PCU register */ -#define SNBEP_PCU_MSR_PMON_CTR0 0xc36 -#define SNBEP_PCU_MSR_PMON_CTL0 0xc30 -#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24 -#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34 -#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff -#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc -#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd - -/* IVT event control */ -#define IVT_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ - SNBEP_PMON_BOX_CTL_RST_CTRS) -#define IVT_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_UMASK_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PMON_CTL_TRESH_MASK) -/* IVT Ubox */ -#define IVT_U_MSR_PMON_GLOBAL_CTL 0xc00 -#define IVT_U_PMON_GLOBAL_FRZ_ALL (1 << 31) -#define IVT_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29) - -#define IVT_U_MSR_PMON_RAW_EVENT_MASK \ - (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_UMASK_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_U_MSR_PMON_CTL_TRESH_MASK) -/* IVT Cbo */ -#define IVT_CBO_MSR_PMON_RAW_EVENT_MASK (IVT_PMON_RAW_EVENT_MASK | \ - SNBEP_CBO_PMON_CTL_TID_EN) - -#define IVT_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0) -#define IVT_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5) -#define IVT_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17) -#define IVT_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32) -#define IVT_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52) -#define IVT_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) -#define IVT_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) -#define IVT_CB0_MSR_PMON_BOX_FILTER_IOSC (0x1ULL << 63) - -/* IVT home agent */ -#define IVT_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16) -#define IVT_HA_PCI_PMON_RAW_EVENT_MASK \ - (IVT_PMON_RAW_EVENT_MASK | \ - IVT_HA_PCI_PMON_CTL_Q_OCC_RST) -/* IVT PCU */ -#define IVT_PCU_MSR_PMON_RAW_EVENT_MASK \ - (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_EV_SEL_EXT | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ - SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ - SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) -/* IVT QPI */ -#define IVT_QPI_PCI_PMON_RAW_EVENT_MASK \ - (IVT_PMON_RAW_EVENT_MASK | \ - SNBEP_PMON_CTL_EV_SEL_EXT) - -/* NHM-EX event control */ -#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff -#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00 -#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0) -#define NHMEX_PMON_CTL_EDGE_DET (1 << 18) -#define NHMEX_PMON_CTL_PMI_EN (1 << 20) -#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22) -#define NHMEX_PMON_CTL_INVERT (1 << 23) -#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000 -#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \ - NHMEX_PMON_CTL_UMASK_MASK | \ - NHMEX_PMON_CTL_EDGE_DET | \ - NHMEX_PMON_CTL_INVERT | \ - NHMEX_PMON_CTL_TRESH_MASK) - -/* NHM-EX Ubox */ -#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00 -#define NHMEX_U_MSR_PMON_CTR 0xc11 -#define NHMEX_U_MSR_PMON_EV_SEL 0xc10 - -#define NHMEX_U_PMON_GLOBAL_EN (1 << 0) -#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e -#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28) -#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29) -#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31) - -#define NHMEX_U_PMON_RAW_EVENT_MASK \ - (NHMEX_PMON_CTL_EV_SEL_MASK | \ - NHMEX_PMON_CTL_EDGE_DET) - -/* NHM-EX Cbox */ -#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00 -#define NHMEX_C0_MSR_PMON_CTR0 0xd11 -#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10 -#define NHMEX_C_MSR_OFFSET 0x20 - -/* NHM-EX Bbox */ -#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20 -#define NHMEX_B0_MSR_PMON_CTR0 0xc31 -#define NHMEX_B0_MSR_PMON_CTL0 0xc30 -#define NHMEX_B_MSR_OFFSET 0x40 -#define NHMEX_B0_MSR_MATCH 0xe45 -#define NHMEX_B0_MSR_MASK 0xe46 -#define NHMEX_B1_MSR_MATCH 0xe4d -#define NHMEX_B1_MSR_MASK 0xe4e - -#define NHMEX_B_PMON_CTL_EN (1 << 0) -#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1 -#define NHMEX_B_PMON_CTL_EV_SEL_MASK \ - (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT) -#define NHMEX_B_PMON_CTR_SHIFT 6 -#define NHMEX_B_PMON_CTR_MASK \ - (0x3 << NHMEX_B_PMON_CTR_SHIFT) -#define NHMEX_B_PMON_RAW_EVENT_MASK \ - (NHMEX_B_PMON_CTL_EV_SEL_MASK | \ - NHMEX_B_PMON_CTR_MASK) - -/* NHM-EX Sbox */ -#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40 -#define NHMEX_S0_MSR_PMON_CTR0 0xc51 -#define NHMEX_S0_MSR_PMON_CTL0 0xc50 -#define NHMEX_S_MSR_OFFSET 0x80 -#define NHMEX_S0_MSR_MM_CFG 0xe48 -#define NHMEX_S0_MSR_MATCH 0xe49 -#define NHMEX_S0_MSR_MASK 0xe4a -#define NHMEX_S1_MSR_MM_CFG 0xe58 -#define NHMEX_S1_MSR_MATCH 0xe59 -#define NHMEX_S1_MSR_MASK 0xe5a - -#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63) -#define NHMEX_S_EVENT_TO_R_PROG_EV 0 - -/* NHM-EX Mbox */ -#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0 -#define NHMEX_M0_MSR_PMU_DSP 0xca5 -#define NHMEX_M0_MSR_PMU_ISS 0xca6 -#define NHMEX_M0_MSR_PMU_MAP 0xca7 -#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8 -#define NHMEX_M0_MSR_PMU_PGT 0xca9 -#define NHMEX_M0_MSR_PMU_PLD 0xcaa -#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab -#define NHMEX_M0_MSR_PMU_CTL0 0xcb0 -#define NHMEX_M0_MSR_PMU_CNT0 0xcb1 -#define NHMEX_M_MSR_OFFSET 0x40 -#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54 -#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c - -#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63) -#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL -#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL -#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34 - -#define NHMEX_M_PMON_CTL_EN (1 << 0) -#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1) -#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2 -#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \ - (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT) -#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4 -#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \ - (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT) -#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6) -#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7) -#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9 -#define NHMEX_M_PMON_CTL_INC_SEL_MASK \ - (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) -#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19 -#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \ - (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) -#define NHMEX_M_PMON_RAW_EVENT_MASK \ - (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \ - NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \ - NHMEX_M_PMON_CTL_WRAP_MODE | \ - NHMEX_M_PMON_CTL_FLAG_MODE | \ - NHMEX_M_PMON_CTL_INC_SEL_MASK | \ - NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) - -#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) -#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n))) - -#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) -#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n))) - -/* - * use the 9~13 bits to select event If the 7th bit is not set, - * otherwise use the 19~21 bits to select event. - */ -#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) -#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \ - NHMEX_M_PMON_CTL_FLAG_MODE) -#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \ - NHMEX_M_PMON_CTL_FLAG_MODE) -#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \ - NHMEX_M_PMON_CTL_FLAG_MODE) -#define MBOX_INC_SEL_EXTAR_REG(c, r) \ - EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \ - MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r) -#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \ - EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \ - MBOX_SET_FLAG_SEL_MASK, \ - (u64)-1, NHMEX_M_##r) - -/* NHM-EX Rbox */ -#define NHMEX_R_MSR_GLOBAL_CTL 0xe00 -#define NHMEX_R_MSR_PMON_CTL0 0xe10 -#define NHMEX_R_MSR_PMON_CNT0 0xe11 -#define NHMEX_R_MSR_OFFSET 0x20 - -#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \ - ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4)) -#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n)) -#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n)) -#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \ - (((n) < 4 ? 0 : 0x10) + (n) * 4) -#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \ - (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) -#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \ - (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1) -#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \ - (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2) -#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \ - (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) -#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \ - (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1) -#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \ - (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2) - -#define NHMEX_R_PMON_CTL_EN (1 << 0) -#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1 -#define NHMEX_R_PMON_CTL_EV_SEL_MASK \ - (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT) -#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6) -#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK - -/* NHM-EX Wbox */ -#define NHMEX_W_MSR_GLOBAL_CTL 0xc80 -#define NHMEX_W_MSR_PMON_CNT0 0xc90 -#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91 -#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394 -#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395 - -#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31) - struct intel_uncore_ops; struct intel_uncore_pmu; struct intel_uncore_box; @@ -505,6 +116,9 @@ struct uncore_event_desc { const char *config; }; +ssize_t uncore_event_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf); + #define INTEL_UNCORE_EVENT_DESC(_name, _config) \ { \ .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \ @@ -522,15 +136,6 @@ static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ static struct kobj_attribute format_attr_##_var = \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL) - -static ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct uncore_event_desc *event = - container_of(attr, struct uncore_event_desc, attr); - return sprintf(buf, "%s", event->config); -} - static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) { return box->pmu->type->box_ctl; @@ -694,3 +299,41 @@ static inline bool uncore_box_is_fake(struct intel_uncore_box *box) { return (box->phys_id < 0); } + +struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event); +struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu); +struct intel_uncore_box *uncore_event_to_box(struct perf_event *event); +u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); +void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); +void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); +void uncore_pmu_event_read(struct perf_event *event); +void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event); +struct event_constraint * +uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event); +void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event); +u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); + +extern struct intel_uncore_type **uncore_msr_uncores; +extern struct intel_uncore_type **uncore_pci_uncores; +extern struct pci_driver *uncore_pci_driver; +extern int uncore_pcibus_to_physid[256]; +extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; +extern struct event_constraint uncore_constraint_empty; + +/* perf_event_intel_uncore_snb.c */ +int snb_uncore_pci_init(void); +int ivb_uncore_pci_init(void); +int hsw_uncore_pci_init(void); +void snb_uncore_cpu_init(void); +void nhm_uncore_cpu_init(void); + +/* perf_event_intel_uncore_snbep.c */ +int snbep_uncore_pci_init(void); +void snbep_uncore_cpu_init(void); +int ivbep_uncore_pci_init(void); +void ivbep_uncore_cpu_init(void); +int hswep_uncore_pci_init(void); +void hswep_uncore_cpu_init(void); + +/* perf_event_intel_uncore_nhmex.c */ +void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c new file mode 100644 index 000000000000..2749965afed0 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c @@ -0,0 +1,1221 @@ +/* Nehalem-EX/Westmere-EX uncore support */ +#include "perf_event_intel_uncore.h" + +/* NHM-EX event control */ +#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff +#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00 +#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0) +#define NHMEX_PMON_CTL_EDGE_DET (1 << 18) +#define NHMEX_PMON_CTL_PMI_EN (1 << 20) +#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22) +#define NHMEX_PMON_CTL_INVERT (1 << 23) +#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000 +#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \ + NHMEX_PMON_CTL_UMASK_MASK | \ + NHMEX_PMON_CTL_EDGE_DET | \ + NHMEX_PMON_CTL_INVERT | \ + NHMEX_PMON_CTL_TRESH_MASK) + +/* NHM-EX Ubox */ +#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00 +#define NHMEX_U_MSR_PMON_CTR 0xc11 +#define NHMEX_U_MSR_PMON_EV_SEL 0xc10 + +#define NHMEX_U_PMON_GLOBAL_EN (1 << 0) +#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e +#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28) +#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29) +#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31) + +#define NHMEX_U_PMON_RAW_EVENT_MASK \ + (NHMEX_PMON_CTL_EV_SEL_MASK | \ + NHMEX_PMON_CTL_EDGE_DET) + +/* NHM-EX Cbox */ +#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00 +#define NHMEX_C0_MSR_PMON_CTR0 0xd11 +#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10 +#define NHMEX_C_MSR_OFFSET 0x20 + +/* NHM-EX Bbox */ +#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20 +#define NHMEX_B0_MSR_PMON_CTR0 0xc31 +#define NHMEX_B0_MSR_PMON_CTL0 0xc30 +#define NHMEX_B_MSR_OFFSET 0x40 +#define NHMEX_B0_MSR_MATCH 0xe45 +#define NHMEX_B0_MSR_MASK 0xe46 +#define NHMEX_B1_MSR_MATCH 0xe4d +#define NHMEX_B1_MSR_MASK 0xe4e + +#define NHMEX_B_PMON_CTL_EN (1 << 0) +#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1 +#define NHMEX_B_PMON_CTL_EV_SEL_MASK \ + (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT) +#define NHMEX_B_PMON_CTR_SHIFT 6 +#define NHMEX_B_PMON_CTR_MASK \ + (0x3 << NHMEX_B_PMON_CTR_SHIFT) +#define NHMEX_B_PMON_RAW_EVENT_MASK \ + (NHMEX_B_PMON_CTL_EV_SEL_MASK | \ + NHMEX_B_PMON_CTR_MASK) + +/* NHM-EX Sbox */ +#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40 +#define NHMEX_S0_MSR_PMON_CTR0 0xc51 +#define NHMEX_S0_MSR_PMON_CTL0 0xc50 +#define NHMEX_S_MSR_OFFSET 0x80 +#define NHMEX_S0_MSR_MM_CFG 0xe48 +#define NHMEX_S0_MSR_MATCH 0xe49 +#define NHMEX_S0_MSR_MASK 0xe4a +#define NHMEX_S1_MSR_MM_CFG 0xe58 +#define NHMEX_S1_MSR_MATCH 0xe59 +#define NHMEX_S1_MSR_MASK 0xe5a + +#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63) +#define NHMEX_S_EVENT_TO_R_PROG_EV 0 + +/* NHM-EX Mbox */ +#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0 +#define NHMEX_M0_MSR_PMU_DSP 0xca5 +#define NHMEX_M0_MSR_PMU_ISS 0xca6 +#define NHMEX_M0_MSR_PMU_MAP 0xca7 +#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8 +#define NHMEX_M0_MSR_PMU_PGT 0xca9 +#define NHMEX_M0_MSR_PMU_PLD 0xcaa +#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab +#define NHMEX_M0_MSR_PMU_CTL0 0xcb0 +#define NHMEX_M0_MSR_PMU_CNT0 0xcb1 +#define NHMEX_M_MSR_OFFSET 0x40 +#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54 +#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c + +#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63) +#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL +#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL +#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34 + +#define NHMEX_M_PMON_CTL_EN (1 << 0) +#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1) +#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2 +#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \ + (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT) +#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4 +#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \ + (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT) +#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6) +#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7) +#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9 +#define NHMEX_M_PMON_CTL_INC_SEL_MASK \ + (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) +#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19 +#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \ + (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) +#define NHMEX_M_PMON_RAW_EVENT_MASK \ + (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \ + NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \ + NHMEX_M_PMON_CTL_WRAP_MODE | \ + NHMEX_M_PMON_CTL_FLAG_MODE | \ + NHMEX_M_PMON_CTL_INC_SEL_MASK | \ + NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) + +#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) +#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n))) + +#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) +#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n))) + +/* + * use the 9~13 bits to select event If the 7th bit is not set, + * otherwise use the 19~21 bits to select event. + */ +#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) +#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_INC_SEL_EXTAR_REG(c, r) \ + EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \ + MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r) +#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \ + EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \ + MBOX_SET_FLAG_SEL_MASK, \ + (u64)-1, NHMEX_M_##r) + +/* NHM-EX Rbox */ +#define NHMEX_R_MSR_GLOBAL_CTL 0xe00 +#define NHMEX_R_MSR_PMON_CTL0 0xe10 +#define NHMEX_R_MSR_PMON_CNT0 0xe11 +#define NHMEX_R_MSR_OFFSET 0x20 + +#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \ + ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4)) +#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n)) +#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n)) +#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \ + (((n) < 4 ? 0 : 0x10) + (n) * 4) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \ + (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \ + (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2) + +#define NHMEX_R_PMON_CTL_EN (1 << 0) +#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1 +#define NHMEX_R_PMON_CTL_EV_SEL_MASK \ + (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT) +#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6) +#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK + +/* NHM-EX Wbox */ +#define NHMEX_W_MSR_GLOBAL_CTL 0xc80 +#define NHMEX_W_MSR_PMON_CNT0 0xc90 +#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91 +#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394 +#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395 + +#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31) + +#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ + ((1ULL << (n)) - 1))) + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7"); +DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63"); + +static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) +{ + wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); +} + +static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + u64 config; + + if (msr) { + rdmsrl(msr, config); + config &= ~((1ULL << uncore_num_counters(box)) - 1); + /* WBox has a fixed counter */ + if (uncore_msr_fixed_ctl(box)) + config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN; + wrmsrl(msr, config); + } +} + +static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + u64 config; + + if (msr) { + rdmsrl(msr, config); + config |= (1ULL << uncore_num_counters(box)) - 1; + /* WBox has a fixed counter */ + if (uncore_msr_fixed_ctl(box)) + config |= NHMEX_W_PMON_GLOBAL_FIXED_EN; + wrmsrl(msr, config); + } +} + +static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + wrmsrl(event->hw.config_base, 0); +} + +static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx >= UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); + else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); + else + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); +} + +#define NHMEX_UNCORE_OPS_COMMON_INIT() \ + .init_box = nhmex_uncore_msr_init_box, \ + .disable_box = nhmex_uncore_msr_disable_box, \ + .enable_box = nhmex_uncore_msr_enable_box, \ + .disable_event = nhmex_uncore_msr_disable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops nhmex_uncore_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_uncore_msr_enable_event, +}; + +static struct attribute *nhmex_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_edge.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_ubox_format_group = { + .name = "format", + .attrs = nhmex_uncore_ubox_formats_attr, +}; + +static struct intel_uncore_type nhmex_uncore_ubox = { + .name = "ubox", + .num_counters = 1, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_U_MSR_PMON_EV_SEL, + .perf_ctr = NHMEX_U_MSR_PMON_CTR, + .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_ubox_format_group +}; + +static struct attribute *nhmex_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_cbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_cbox_formats_attr, +}; + +/* msr offset for each instance of cbox */ +static unsigned nhmex_cbox_msr_offsets[] = { + 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0, +}; + +static struct intel_uncore_type nhmex_uncore_cbox = { + .name = "cbox", + .num_counters = 6, + .num_boxes = 10, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0, + .perf_ctr = NHMEX_C0_MSR_PMON_CTR0, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL, + .msr_offsets = nhmex_cbox_msr_offsets, + .pair_ctr_ctl = 1, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_cbox_format_group +}; + +static struct uncore_event_desc nhmex_uncore_wbox_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type nhmex_uncore_wbox = { + .name = "wbox", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_W_MSR_PMON_CNT0, + .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0, + .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR, + .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_W_MSR_GLOBAL_CTL, + .pair_ctr_ctl = 1, + .event_descs = nhmex_uncore_wbox_events, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_cbox_format_group +}; + +static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int ctr, ev_sel; + + ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >> + NHMEX_B_PMON_CTR_SHIFT; + ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >> + NHMEX_B_PMON_CTL_EV_SEL_SHIFT; + + /* events that do not use the match/mask registers */ + if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) || + (ctr == 2 && ev_sel != 0x4) || ctr == 3) + return 0; + + if (box->pmu->pmu_idx == 0) + reg1->reg = NHMEX_B0_MSR_MATCH; + else + reg1->reg = NHMEX_B1_MSR_MATCH; + reg1->idx = 0; + reg1->config = event->attr.config1; + reg2->config = event->attr.config2; + return 0; +} + +static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + wrmsrl(reg1->reg, reg1->config); + wrmsrl(reg1->reg + 1, reg2->config); + } + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK)); +} + +/* + * The Bbox has 4 counters, but each counter monitors different events. + * Use bits 6-7 in the event config to select counter. + */ +static struct event_constraint nhmex_uncore_bbox_constraints[] = { + EVENT_CONSTRAINT(0 , 1, 0xc0), + EVENT_CONSTRAINT(0x40, 2, 0xc0), + EVENT_CONSTRAINT(0x80, 4, 0xc0), + EVENT_CONSTRAINT(0xc0, 8, 0xc0), + EVENT_CONSTRAINT_END, +}; + +static struct attribute *nhmex_uncore_bbox_formats_attr[] = { + &format_attr_event5.attr, + &format_attr_counter.attr, + &format_attr_match.attr, + &format_attr_mask.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_bbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_bbox_formats_attr, +}; + +static struct intel_uncore_ops nhmex_uncore_bbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_bbox_msr_enable_event, + .hw_config = nhmex_bbox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_bbox = { + .name = "bbox", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_B0_MSR_PMON_CTL0, + .perf_ctr = NHMEX_B0_MSR_PMON_CTR0, + .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_B_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 1, + .constraints = nhmex_uncore_bbox_constraints, + .ops = &nhmex_uncore_bbox_ops, + .format_group = &nhmex_uncore_bbox_format_group +}; + +static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + /* only TO_R_PROG_EV event uses the match/mask register */ + if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) != + NHMEX_S_EVENT_TO_R_PROG_EV) + return 0; + + if (box->pmu->pmu_idx == 0) + reg1->reg = NHMEX_S0_MSR_MM_CFG; + else + reg1->reg = NHMEX_S1_MSR_MM_CFG; + reg1->idx = 0; + reg1->config = event->attr.config1; + reg2->config = event->attr.config2; + return 0; +} + +static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + wrmsrl(reg1->reg, 0); + wrmsrl(reg1->reg + 1, reg1->config); + wrmsrl(reg1->reg + 2, reg2->config); + wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); + } + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); +} + +static struct attribute *nhmex_uncore_sbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_match.attr, + &format_attr_mask.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_sbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_sbox_formats_attr, +}; + +static struct intel_uncore_ops nhmex_uncore_sbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_sbox_msr_enable_event, + .hw_config = nhmex_sbox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_S0_MSR_PMON_CTL0, + .perf_ctr = NHMEX_S0_MSR_PMON_CTR0, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_S_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 1, + .ops = &nhmex_uncore_sbox_ops, + .format_group = &nhmex_uncore_sbox_format_group +}; + +enum { + EXTRA_REG_NHMEX_M_FILTER, + EXTRA_REG_NHMEX_M_DSP, + EXTRA_REG_NHMEX_M_ISS, + EXTRA_REG_NHMEX_M_MAP, + EXTRA_REG_NHMEX_M_MSC_THR, + EXTRA_REG_NHMEX_M_PGT, + EXTRA_REG_NHMEX_M_PLD, + EXTRA_REG_NHMEX_M_ZDP_CTL_FVC, +}; + +static struct extra_reg nhmex_uncore_mbox_extra_regs[] = { + MBOX_INC_SEL_EXTAR_REG(0x0, DSP), + MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR), + MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR), + MBOX_INC_SEL_EXTAR_REG(0x9, ISS), + /* event 0xa uses two extra registers */ + MBOX_INC_SEL_EXTAR_REG(0xa, ISS), + MBOX_INC_SEL_EXTAR_REG(0xa, PLD), + MBOX_INC_SEL_EXTAR_REG(0xb, PLD), + /* events 0xd ~ 0x10 use the same extra register */ + MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0x16, PGT), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP), + EVENT_EXTRA_END +}; + +/* Nehalem-EX or Westmere-EX ? */ +static bool uncore_nhmex; + +static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + bool ret = false; + u64 mask; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + er = &box->shared_regs[idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (!atomic_read(&er->ref) || er->config == config) { + atomic_inc(&er->ref); + er->config = config; + ret = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + return ret; + } + /* + * The ZDP_CTL_FVC MSR has 4 fields which are used to control + * events 0xd ~ 0x10. Besides these 4 fields, there are additional + * fields which are shared. + */ + idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (WARN_ON_ONCE(idx >= 4)) + return false; + + /* mask of the shared fields */ + if (uncore_nhmex) + mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK; + else + mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK; + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + + raw_spin_lock_irqsave(&er->lock, flags); + /* add mask of the non-shared field if it's in use */ + if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) { + if (uncore_nhmex) + mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + } + + if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) { + atomic_add(1 << (idx * 8), &er->ref); + if (uncore_nhmex) + mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK | + NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK | + WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + er->config &= ~mask; + er->config |= (config & mask); + ret = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + return ret; +} + +static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + er = &box->shared_regs[idx]; + atomic_dec(&er->ref); + return; + } + + idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + atomic_sub(1 << (idx * 8), &er->ref); +} + +static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); + u64 config = reg1->config; + + /* get the non-shared control bits and shift them */ + idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (uncore_nhmex) + config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + if (new_idx > orig_idx) { + idx = new_idx - orig_idx; + config <<= 3 * idx; + } else { + idx = orig_idx - new_idx; + config >>= 3 * idx; + } + + /* add the shared control bits back */ + if (uncore_nhmex) + config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + else + config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + if (modify) { + /* adjust the main event selector */ + if (new_idx > orig_idx) + hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; + else + hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; + reg1->config = config; + reg1->idx = ~0xff | new_idx; + } + return config; +} + +static struct event_constraint * +nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + int i, idx[2], alloc = 0; + u64 config1 = reg1->config; + + idx[0] = __BITS_VALUE(reg1->idx, 0, 8); + idx[1] = __BITS_VALUE(reg1->idx, 1, 8); +again: + for (i = 0; i < 2; i++) { + if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) + idx[i] = 0xff; + + if (idx[i] == 0xff) + continue; + + if (!nhmex_mbox_get_shared_reg(box, idx[i], + __BITS_VALUE(config1, i, 32))) + goto fail; + alloc |= (0x1 << i); + } + + /* for the match/mask registers */ + if (reg2->idx != EXTRA_REG_NONE && + (uncore_box_is_fake(box) || !reg2->alloc) && + !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config)) + goto fail; + + /* + * If it's a fake box -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + */ + if (!uncore_box_is_fake(box)) { + if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) + nhmex_mbox_alter_er(event, idx[0], true); + reg1->alloc |= alloc; + if (reg2->idx != EXTRA_REG_NONE) + reg2->alloc = 1; + } + return NULL; +fail: + if (idx[0] != 0xff && !(alloc & 0x1) && + idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + /* + * events 0xd ~ 0x10 are functional identical, but are + * controlled by different fields in the ZDP_CTL_FVC + * register. If we failed to take one field, try the + * rest 3 choices. + */ + BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff); + idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + idx[0] = (idx[0] + 1) % 4; + idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) { + config1 = nhmex_mbox_alter_er(event, idx[0], false); + goto again; + } + } + + if (alloc & 0x1) + nhmex_mbox_put_shared_reg(box, idx[0]); + if (alloc & 0x2) + nhmex_mbox_put_shared_reg(box, idx[1]); + return &uncore_constraint_empty; +} + +static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + + if (uncore_box_is_fake(box)) + return; + + if (reg1->alloc & 0x1) + nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8)); + if (reg1->alloc & 0x2) + nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8)); + reg1->alloc = 0; + + if (reg2->alloc) { + nhmex_mbox_put_shared_reg(box, reg2->idx); + reg2->alloc = 0; + } +} + +static int nhmex_mbox_extra_reg_idx(struct extra_reg *er) +{ + if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) + return er->idx; + return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd; +} + +static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_type *type = box->pmu->type; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + struct extra_reg *er; + unsigned msr; + int reg_idx = 0; + /* + * The mbox events may require 2 extra MSRs at the most. But only + * the lower 32 bits in these MSRs are significant, so we can use + * config1 to pass two MSRs' config. + */ + for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + if (event->attr.config1 & ~er->valid_mask) + return -EINVAL; + + msr = er->msr + type->msr_offset * box->pmu->pmu_idx; + if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff)) + return -EINVAL; + + /* always use the 32~63 bits to pass the PLD config */ + if (er->idx == EXTRA_REG_NHMEX_M_PLD) + reg_idx = 1; + else if (WARN_ON_ONCE(reg_idx > 0)) + return -EINVAL; + + reg1->idx &= ~(0xff << (reg_idx * 8)); + reg1->reg &= ~(0xffff << (reg_idx * 16)); + reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8); + reg1->reg |= msr << (reg_idx * 16); + reg1->config = event->attr.config1; + reg_idx++; + } + /* + * The mbox only provides ability to perform address matching + * for the PLD events. + */ + if (reg_idx == 2) { + reg2->idx = EXTRA_REG_NHMEX_M_FILTER; + if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN) + reg2->config = event->attr.config2; + else + reg2->config = ~0ULL; + if (box->pmu->pmu_idx == 0) + reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG; + else + reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG; + } + return 0; +} + +static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + u64 config; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) + return box->shared_regs[idx].config; + + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + raw_spin_lock_irqsave(&er->lock, flags); + config = er->config; + raw_spin_unlock_irqrestore(&er->lock, flags); + return config; +} + +static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int idx; + + idx = __BITS_VALUE(reg1->idx, 0, 8); + if (idx != 0xff) + wrmsrl(__BITS_VALUE(reg1->reg, 0, 16), + nhmex_mbox_shared_reg_config(box, idx)); + idx = __BITS_VALUE(reg1->idx, 1, 8); + if (idx != 0xff) + wrmsrl(__BITS_VALUE(reg1->reg, 1, 16), + nhmex_mbox_shared_reg_config(box, idx)); + + if (reg2->idx != EXTRA_REG_NONE) { + wrmsrl(reg2->reg, 0); + if (reg2->config != ~0ULL) { + wrmsrl(reg2->reg + 1, + reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK); + wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & + (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT)); + wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); + } + } + + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); +} + +DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3"); +DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5"); +DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6"); +DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7"); +DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13"); +DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21"); +DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63"); +DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33"); +DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61"); +DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63"); + +static struct attribute *nhmex_uncore_mbox_formats_attr[] = { + &format_attr_count_mode.attr, + &format_attr_storage_mode.attr, + &format_attr_wrap_mode.attr, + &format_attr_flag_mode.attr, + &format_attr_inc_sel.attr, + &format_attr_set_flag_sel.attr, + &format_attr_filter_cfg_en.attr, + &format_attr_filter_match.attr, + &format_attr_filter_mask.attr, + &format_attr_dsp.attr, + &format_attr_thr.attr, + &format_attr_fvc.attr, + &format_attr_pgt.attr, + &format_attr_map.attr, + &format_attr_iss.attr, + &format_attr_pld.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_mbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_mbox_formats_attr, +}; + +static struct uncore_event_desc nhmex_uncore_mbox_events[] = { + INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"), + INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"), + { /* end: all zeroes */ }, +}; + +static struct uncore_event_desc wsmex_uncore_mbox_events[] = { + INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"), + INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhmex_uncore_mbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_mbox_msr_enable_event, + .hw_config = nhmex_mbox_hw_config, + .get_constraint = nhmex_mbox_get_constraint, + .put_constraint = nhmex_mbox_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_mbox = { + .name = "mbox", + .num_counters = 6, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_M0_MSR_PMU_CTL0, + .perf_ctr = NHMEX_M0_MSR_PMU_CNT0, + .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL, + .msr_offset = NHMEX_M_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 8, + .event_descs = nhmex_uncore_mbox_events, + .ops = &nhmex_uncore_mbox_ops, + .format_group = &nhmex_uncore_mbox_format_group, +}; + +static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + /* adjust the main event selector and extra register index */ + if (reg1->idx % 2) { + reg1->idx--; + hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + } else { + reg1->idx++; + hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + } + + /* adjust extra register config */ + switch (reg1->idx % 6) { + case 2: + /* shift the 8~15 bits to the 0~7 bits */ + reg1->config >>= 8; + break; + case 3: + /* shift the 0~7 bits to the 8~15 bits */ + reg1->config <<= 8; + break; + } +} + +/* + * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7. + * An event set consists of 6 events, the 3rd and 4th events in + * an event set use the same extra register. So an event set uses + * 5 extra registers. + */ +static struct event_constraint * +nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + struct intel_uncore_extra_reg *er; + unsigned long flags; + int idx, er_idx; + u64 config1; + bool ok = false; + + if (!uncore_box_is_fake(box) && reg1->alloc) + return NULL; + + idx = reg1->idx % 6; + config1 = reg1->config; +again: + er_idx = idx; + /* the 3rd and 4th events use the same extra register */ + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + er = &box->shared_regs[er_idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (idx < 2) { + if (!atomic_read(&er->ref) || er->config == reg1->config) { + atomic_inc(&er->ref); + er->config = reg1->config; + ok = true; + } + } else if (idx == 2 || idx == 3) { + /* + * these two events use different fields in a extra register, + * the 0~7 bits and the 8~15 bits respectively. + */ + u64 mask = 0xff << ((idx - 2) * 8); + if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) || + !((er->config ^ config1) & mask)) { + atomic_add(1 << ((idx - 2) * 8), &er->ref); + er->config &= ~mask; + er->config |= config1 & mask; + ok = true; + } + } else { + if (!atomic_read(&er->ref) || + (er->config == (hwc->config >> 32) && + er->config1 == reg1->config && + er->config2 == reg2->config)) { + atomic_inc(&er->ref); + er->config = (hwc->config >> 32); + er->config1 = reg1->config; + er->config2 = reg2->config; + ok = true; + } + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (!ok) { + /* + * The Rbox events are always in pairs. The paired + * events are functional identical, but use different + * extra registers. If we failed to take an extra + * register, try the alternative. + */ + idx ^= 1; + if (idx != reg1->idx % 6) { + if (idx == 2) + config1 >>= 8; + else if (idx == 3) + config1 <<= 8; + goto again; + } + } else { + if (!uncore_box_is_fake(box)) { + if (idx != reg1->idx % 6) + nhmex_rbox_alter_er(box, event); + reg1->alloc = 1; + } + return NULL; + } + return &uncore_constraint_empty; +} + +static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + int idx, er_idx; + + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + idx = reg1->idx % 6; + er_idx = idx; + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + er = &box->shared_regs[er_idx]; + if (idx == 2 || idx == 3) + atomic_sub(1 << ((idx - 2) * 8), &er->ref); + else + atomic_dec(&er->ref); + + reg1->alloc = 0; +} + +static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + int idx; + + idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >> + NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + if (idx >= 0x18) + return -EINVAL; + + reg1->idx = idx; + reg1->config = event->attr.config1; + + switch (idx % 6) { + case 4: + case 5: + hwc->config |= event->attr.config & (~0ULL << 32); + reg2->config = event->attr.config2; + break; + } + return 0; +} + +static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int idx, port; + + idx = reg1->idx; + port = idx / 6 + box->pmu->pmu_idx * 4; + + switch (idx % 6) { + case 0: + wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config); + break; + case 1: + wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config); + break; + case 2: + case 3: + wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port), + uncore_shared_reg_config(box, 2 + (idx / 6) * 5)); + break; + case 4: + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), + hwc->config >> 32); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config); + break; + case 5: + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port), + hwc->config >> 32); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config); + break; + } + + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK)); +} + +DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63"); +DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15"); +DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31"); + +static struct attribute *nhmex_uncore_rbox_formats_attr[] = { + &format_attr_event5.attr, + &format_attr_xbr_mm_cfg.attr, + &format_attr_xbr_match.attr, + &format_attr_xbr_mask.attr, + &format_attr_qlx_cfg.attr, + &format_attr_iperf_cfg.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_rbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_rbox_formats_attr, +}; + +static struct uncore_event_desc nhmex_uncore_rbox_events[] = { + INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"), + INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"), + INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"), + INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"), + INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"), + INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhmex_uncore_rbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_rbox_msr_enable_event, + .hw_config = nhmex_rbox_hw_config, + .get_constraint = nhmex_rbox_get_constraint, + .put_constraint = nhmex_rbox_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_rbox = { + .name = "rbox", + .num_counters = 8, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_R_MSR_PMON_CTL0, + .perf_ctr = NHMEX_R_MSR_PMON_CNT0, + .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_R_MSR_GLOBAL_CTL, + .msr_offset = NHMEX_R_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 20, + .event_descs = nhmex_uncore_rbox_events, + .ops = &nhmex_uncore_rbox_ops, + .format_group = &nhmex_uncore_rbox_format_group +}; + +static struct intel_uncore_type *nhmex_msr_uncores[] = { + &nhmex_uncore_ubox, + &nhmex_uncore_cbox, + &nhmex_uncore_bbox, + &nhmex_uncore_sbox, + &nhmex_uncore_mbox, + &nhmex_uncore_rbox, + &nhmex_uncore_wbox, + NULL, +}; + +void nhmex_uncore_cpu_init(void) +{ + if (boot_cpu_data.x86_model == 46) + uncore_nhmex = true; + else + nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; + if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = nhmex_msr_uncores; +} +/* end of Nehalem-EX uncore support */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c new file mode 100644 index 000000000000..3001015b755c --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -0,0 +1,636 @@ +/* Nehalem/SandBridge/Haswell uncore support */ +#include "perf_event_intel_uncore.h" + +/* SNB event control */ +#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff +#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 +#define SNB_UNC_CTL_EDGE_DET (1 << 18) +#define SNB_UNC_CTL_EN (1 << 22) +#define SNB_UNC_CTL_INVERT (1 << 23) +#define SNB_UNC_CTL_CMASK_MASK 0x1f000000 +#define NHM_UNC_CTL_CMASK_MASK 0xff000000 +#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0) + +#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + SNB_UNC_CTL_CMASK_MASK) + +#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + NHM_UNC_CTL_CMASK_MASK) + +/* SNB global control register */ +#define SNB_UNC_PERF_GLOBAL_CTL 0x391 +#define SNB_UNC_FIXED_CTR_CTRL 0x394 +#define SNB_UNC_FIXED_CTR 0x395 + +/* SNB uncore global control */ +#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1) +#define SNB_UNC_GLOBAL_CTL_EN (1 << 29) + +/* SNB Cbo register */ +#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700 +#define SNB_UNC_CBO_0_PER_CTR0 0x706 +#define SNB_UNC_CBO_MSR_OFFSET 0x10 + +/* NHM global control register */ +#define NHM_UNC_PERF_GLOBAL_CTL 0x391 +#define NHM_UNC_FIXED_CTR 0x394 +#define NHM_UNC_FIXED_CTR_CTRL 0x395 + +/* NHM uncore global control */ +#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1) +#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32) + +/* NHM uncore register */ +#define NHM_UNC_PERFEVTSEL0 0x3c0 +#define NHM_UNC_UNCORE_PMC0 0x3b0 + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); + +/* Sandy Bridge uncore support */ +static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx < UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + else + wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); +} + +static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + wrmsrl(event->hw.config_base, 0); +} + +static void snb_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) { + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); + } +} + +static struct uncore_event_desc snb_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + { /* end: all zeroes */ }, +}; + +static struct attribute *snb_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask5.attr, + NULL, +}; + +static struct attribute_group snb_uncore_format_group = { + .name = "format", + .attrs = snb_uncore_formats_attr, +}; + +static struct intel_uncore_ops snb_uncore_msr_ops = { + .init_box = snb_uncore_msr_init_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct event_constraint snb_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x80, 0x1), + UNCORE_EVENT_CONSTRAINT(0x83, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snb_uncore_cbox = { + .name = "cbox", + .num_counters = 2, + .num_boxes = 4, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .constraints = snb_uncore_cbox_constraints, + .ops = &snb_uncore_msr_ops, + .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, +}; + +static struct intel_uncore_type *snb_msr_uncores[] = { + &snb_uncore_cbox, + NULL, +}; + +void snb_uncore_cpu_init(void) +{ + uncore_msr_uncores = snb_msr_uncores; + if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; +} + +enum { + SNB_PCI_UNCORE_IMC, +}; + +static struct uncore_event_desc snb_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"), + INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"), + INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), + + { /* end: all zeroes */ }, +}; + +#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff +#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48 + +/* page size multiple covering all config regs */ +#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000 + +#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1 +#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050 +#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2 +#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 +#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE + +static struct attribute *snb_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group snb_uncore_imc_format_group = { + .name = "format", + .attrs = snb_uncore_imc_formats_attr, +}; + +static void snb_uncore_imc_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET; + resource_size_t addr; + u32 pci_dword; + + pci_read_config_dword(pdev, where, &pci_dword); + addr = pci_dword; + +#ifdef CONFIG_PHYS_ADDR_T_64BIT + pci_read_config_dword(pdev, where + 4, &pci_dword); + addr |= ((resource_size_t)pci_dword << 32); +#endif + + addr &= ~(PAGE_SIZE - 1); + + box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); + box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; +} + +static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) +{} + +static void snb_uncore_imc_disable_box(struct intel_uncore_box *box) +{} + +static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{} + +static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{} + +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); +} + +/* + * custom event_init() function because we define our own fixed, free + * running counters, so we do not want to conflict with generic uncore + * logic. Also simplifies processing + */ +static int snb_uncore_imc_event_init(struct perf_event *event) +{ + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + struct hw_perf_event *hwc = &event->hw; + u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK; + int idx, base; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + pmu = uncore_event_to_pmu(event); + /* no device found for this pmu */ + if (pmu->func_id < 0) + return -ENOENT; + + /* Sampling not supported yet */ + if (hwc->sample_period) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + event->attr.sample_period) /* no sampling */ + return -EINVAL; + + /* + * Place all uncore events for a particular physical package + * onto a single cpu + */ + if (event->cpu < 0) + return -EINVAL; + + /* check only supported bits are set */ + if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK) + return -EINVAL; + + box = uncore_pmu_to_box(pmu, event->cpu); + if (!box || box->cpu < 0) + return -EINVAL; + + event->cpu = box->cpu; + + event->hw.idx = -1; + event->hw.last_tag = ~0ULL; + event->hw.extra_reg.idx = EXTRA_REG_NONE; + event->hw.branch_reg.idx = EXTRA_REG_NONE; + /* + * check event is known (whitelist, determines counter) + */ + switch (cfg) { + case SNB_UNCORE_PCI_IMC_DATA_READS: + base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE; + idx = UNCORE_PMC_IDX_FIXED; + break; + case SNB_UNCORE_PCI_IMC_DATA_WRITES: + base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; + idx = UNCORE_PMC_IDX_FIXED + 1; + break; + default: + return -EINVAL; + } + + /* must be done before validate_group */ + event->hw.event_base = base; + event->hw.config = cfg; + event->hw.idx = idx; + + /* no group validation needed, we have free running counters */ + + return 0; +} + +static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + return 0; +} + +static void snb_uncore_imc_event_start(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + u64 count; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + event->hw.state = 0; + box->n_active++; + + list_add_tail(&event->active_entry, &box->active_list); + + count = snb_uncore_imc_read_counter(box, event); + local64_set(&event->hw.prev_count, count); + + if (box->n_active == 1) + uncore_pmu_start_hrtimer(box); +} + +static void snb_uncore_imc_event_stop(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + box->n_active--; + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + list_del(&event->active_entry); + + if (box->n_active == 0) + uncore_pmu_cancel_hrtimer(box); + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + uncore_perf_event_update(box, event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static int snb_uncore_imc_event_add(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + struct hw_perf_event *hwc = &event->hw; + + if (!box) + return -ENODEV; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + snb_uncore_imc_event_start(event, 0); + + box->n_events++; + + return 0; +} + +static void snb_uncore_imc_event_del(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + int i; + + snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); + + for (i = 0; i < box->n_events; i++) { + if (event == box->event_list[i]) { + --box->n_events; + break; + } + } +} + +static int snb_pci2phy_map_init(int devid) +{ + struct pci_dev *dev = NULL; + int bus; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); + if (!dev) + return -ENOTTY; + + bus = dev->bus->number; + + uncore_pcibus_to_physid[bus] = 0; + + pci_dev_put(dev); + + return 0; +} + +static struct pmu snb_uncore_imc_pmu = { + .task_ctx_nr = perf_invalid_context, + .event_init = snb_uncore_imc_event_init, + .add = snb_uncore_imc_event_add, + .del = snb_uncore_imc_event_del, + .start = snb_uncore_imc_event_start, + .stop = snb_uncore_imc_event_stop, + .read = uncore_pmu_event_read, +}; + +static struct intel_uncore_ops snb_uncore_imc_ops = { + .init_box = snb_uncore_imc_init_box, + .enable_box = snb_uncore_imc_enable_box, + .disable_box = snb_uncore_imc_disable_box, + .disable_event = snb_uncore_imc_disable_event, + .enable_event = snb_uncore_imc_enable_event, + .hw_config = snb_uncore_imc_hw_config, + .read_counter = snb_uncore_imc_read_counter, +}; + +static struct intel_uncore_type snb_uncore_imc = { + .name = "imc", + .num_counters = 2, + .num_boxes = 1, + .fixed_ctr_bits = 32, + .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE, + .event_descs = snb_uncore_imc_events, + .format_group = &snb_uncore_imc_format_group, + .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE, + .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK, + .ops = &snb_uncore_imc_ops, + .pmu = &snb_uncore_imc_pmu, +}; + +static struct intel_uncore_type *snb_pci_uncores[] = { + [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc, + NULL, +}; + +static const struct pci_device_id snb_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, +}; + +static const struct pci_device_id ivb_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, +}; + +static const struct pci_device_id hsw_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, +}; + +static struct pci_driver snb_uncore_pci_driver = { + .name = "snb_uncore", + .id_table = snb_uncore_pci_ids, +}; + +static struct pci_driver ivb_uncore_pci_driver = { + .name = "ivb_uncore", + .id_table = ivb_uncore_pci_ids, +}; + +static struct pci_driver hsw_uncore_pci_driver = { + .name = "hsw_uncore", + .id_table = hsw_uncore_pci_ids, +}; + +struct imc_uncore_pci_dev { + __u32 pci_id; + struct pci_driver *driver; +}; +#define IMC_DEV(a, d) \ + { .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) } + +static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { + IMC_DEV(SNB_IMC, &snb_uncore_pci_driver), + IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */ + IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ + IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ + { /* end marker */ } +}; + + +#define for_each_imc_pci_id(x, t) \ + for (x = (t); (x)->pci_id; x++) + +static struct pci_driver *imc_uncore_find_dev(void) +{ + const struct imc_uncore_pci_dev *p; + int ret; + + for_each_imc_pci_id(p, desktop_imc_pci_ids) { + ret = snb_pci2phy_map_init(p->pci_id); + if (ret == 0) + return p->driver; + } + return NULL; +} + +static int imc_uncore_pci_init(void) +{ + struct pci_driver *imc_drv = imc_uncore_find_dev(); + + if (!imc_drv) + return -ENODEV; + + uncore_pci_uncores = snb_pci_uncores; + uncore_pci_driver = imc_drv; + + return 0; +} + +int snb_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + +int ivb_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} +int hsw_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + +/* end of Sandy Bridge uncore support */ + +/* Nehalem uncore support */ +static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); +} + +static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); +} + +static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx < UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + else + wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); +} + +static struct attribute *nhm_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask8.attr, + NULL, +}; + +static struct attribute_group nhm_uncore_format_group = { + .name = "format", + .attrs = nhm_uncore_formats_attr, +}; + +static struct uncore_event_desc nhm_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"), + INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"), + INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"), + INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"), + INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"), + INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhm_uncore_msr_ops = { + .disable_box = nhm_uncore_msr_disable_box, + .enable_box = nhm_uncore_msr_enable_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = nhm_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type nhm_uncore = { + .name = "", + .num_counters = 8, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .event_ctl = NHM_UNC_PERFEVTSEL0, + .perf_ctr = NHM_UNC_UNCORE_PMC0, + .fixed_ctr = NHM_UNC_FIXED_CTR, + .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL, + .event_mask = NHM_UNC_RAW_EVENT_MASK, + .event_descs = nhm_uncore_events, + .ops = &nhm_uncore_msr_ops, + .format_group = &nhm_uncore_format_group, +}; + +static struct intel_uncore_type *nhm_msr_uncores[] = { + &nhm_uncore, + NULL, +}; + +void nhm_uncore_cpu_init(void) +{ + uncore_msr_uncores = nhm_msr_uncores; +} + +/* end of Nehalem uncore support */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c new file mode 100644 index 000000000000..adf138eac85c --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -0,0 +1,2258 @@ +/* SandyBridge-EP/IvyTown uncore support */ +#include "perf_event_intel_uncore.h" + + +/* SNB-EP Box level control */ +#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) +#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1) +#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8) +#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16) +#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ + SNBEP_PMON_BOX_CTL_RST_CTRS | \ + SNBEP_PMON_BOX_CTL_FRZ_EN) +/* SNB-EP event control */ +#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff +#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00 +#define SNBEP_PMON_CTL_RST (1 << 17) +#define SNBEP_PMON_CTL_EDGE_DET (1 << 18) +#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) +#define SNBEP_PMON_CTL_EN (1 << 22) +#define SNBEP_PMON_CTL_INVERT (1 << 23) +#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000 +#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_PMON_CTL_TRESH_MASK) + +/* SNB-EP Ubox event control */ +#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000 +#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_U_MSR_PMON_CTL_TRESH_MASK) + +#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19) +#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + +/* SNB-EP PCU event control */ +#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000 +#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000 +#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30) +#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31) +#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_EV_SEL_EXT | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) + +#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT) + +/* SNB-EP pci control register */ +#define SNBEP_PCI_PMON_BOX_CTL 0xf4 +#define SNBEP_PCI_PMON_CTL0 0xd8 +/* SNB-EP pci counter register */ +#define SNBEP_PCI_PMON_CTR0 0xa0 + +/* SNB-EP home agent register */ +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40 +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44 +#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48 +/* SNB-EP memory controller register */ +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0 +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0 +/* SNB-EP QPI register */ +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228 +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238 +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c + +/* SNB-EP Ubox register */ +#define SNBEP_U_MSR_PMON_CTR0 0xc16 +#define SNBEP_U_MSR_PMON_CTL0 0xc10 + +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08 +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09 + +/* SNB-EP Cbo register */ +#define SNBEP_C0_MSR_PMON_CTR0 0xd16 +#define SNBEP_C0_MSR_PMON_CTL0 0xd10 +#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04 +#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 +#define SNBEP_CBO_MSR_OFFSET 0x20 + +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000 + +#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \ + .event = (e), \ + .msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \ + .config_mask = (m), \ + .idx = (i) \ +} + +/* SNB-EP PCU register */ +#define SNBEP_PCU_MSR_PMON_CTR0 0xc36 +#define SNBEP_PCU_MSR_PMON_CTL0 0xc30 +#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff +#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc +#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd + +/* IVBEP event control */ +#define IVBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ + SNBEP_PMON_BOX_CTL_RST_CTRS) +#define IVBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_TRESH_MASK) +/* IVBEP Ubox */ +#define IVBEP_U_MSR_PMON_GLOBAL_CTL 0xc00 +#define IVBEP_U_PMON_GLOBAL_FRZ_ALL (1 << 31) +#define IVBEP_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29) + +#define IVBEP_U_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_U_MSR_PMON_CTL_TRESH_MASK) +/* IVBEP Cbo */ +#define IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK (IVBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) +#define IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63) + +/* IVBEP home agent */ +#define IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16) +#define IVBEP_HA_PCI_PMON_RAW_EVENT_MASK \ + (IVBEP_PMON_RAW_EVENT_MASK | \ + IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST) +/* IVBEP PCU */ +#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) +/* IVBEP QPI */ +#define IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ + (IVBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT) + +#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ + ((1ULL << (n)) - 1))) + +/* Haswell-EP Ubox */ +#define HSWEP_U_MSR_PMON_CTR0 0x705 +#define HSWEP_U_MSR_PMON_CTL0 0x709 +#define HSWEP_U_MSR_PMON_FILTER 0x707 + +#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL 0x703 +#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTR 0x704 + +#define HSWEP_U_MSR_PMON_BOX_FILTER_TID (0x1 << 0) +#define HSWEP_U_MSR_PMON_BOX_FILTER_CID (0x1fULL << 1) +#define HSWEP_U_MSR_PMON_BOX_FILTER_MASK \ + (HSWEP_U_MSR_PMON_BOX_FILTER_TID | \ + HSWEP_U_MSR_PMON_BOX_FILTER_CID) + +/* Haswell-EP CBo */ +#define HSWEP_C0_MSR_PMON_CTR0 0xe08 +#define HSWEP_C0_MSR_PMON_CTL0 0xe01 +#define HSWEP_C0_MSR_PMON_BOX_CTL 0xe00 +#define HSWEP_C0_MSR_PMON_BOX_FILTER0 0xe05 +#define HSWEP_CBO_MSR_OFFSET 0x10 + + +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_TID (0x3fULL << 0) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 6) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x7fULL << 17) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) +#define HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63) + + +/* Haswell-EP Sbox */ +#define HSWEP_S0_MSR_PMON_CTR0 0x726 +#define HSWEP_S0_MSR_PMON_CTL0 0x721 +#define HSWEP_S0_MSR_PMON_BOX_CTL 0x720 +#define HSWEP_SBOX_MSR_OFFSET 0xa +#define HSWEP_S_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + +/* Haswell-EP PCU */ +#define HSWEP_PCU_MSR_PMON_CTR0 0x717 +#define HSWEP_PCU_MSR_PMON_CTL0 0x711 +#define HSWEP_PCU_MSR_PMON_BOX_CTL 0x710 +#define HSWEP_PCU_MSR_PMON_BOX_FILTER 0x715 + + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); +DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); +DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5"); +DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); +DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62"); +DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61"); +DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51"); +DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35"); +DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31"); +DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17"); +DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12"); +DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8"); +DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4"); +DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63"); +DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51"); +DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35"); +DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31"); +DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17"); +DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12"); +DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8"); +DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); +DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); + +static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + u32 config = 0; + + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config |= SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } +} + +static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + u32 config = 0; + + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } +} + +static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config); +} + +static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); + pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); + + return count; +} + +static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + + pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT); +} + +static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + u64 config; + unsigned msr; + + msr = uncore_msr_box_ctl(box); + if (msr) { + rdmsrl(msr, config); + config |= SNBEP_PMON_BOX_CTL_FRZ; + wrmsrl(msr, config); + } +} + +static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + u64 config; + unsigned msr; + + msr = uncore_msr_box_ctl(box); + if (msr) { + rdmsrl(msr, config); + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + wrmsrl(msr, config); + } +} + +static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0)); + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); +} + +static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + + if (msr) + wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); +} + +static struct attribute *snbep_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute *snbep_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + NULL, +}; + +static struct attribute *snbep_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid.attr, + &format_attr_filter_nid.attr, + &format_attr_filter_state.attr, + &format_attr_filter_opc.attr, + NULL, +}; + +static struct attribute *snbep_uncore_pcu_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_occ_sel.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute *snbep_uncore_qpi_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_match_rds.attr, + &format_attr_match_rnid30.attr, + &format_attr_match_rnid4.attr, + &format_attr_match_dnid.attr, + &format_attr_match_mc.attr, + &format_attr_match_opc.attr, + &format_attr_match_vnw.attr, + &format_attr_match0.attr, + &format_attr_match1.attr, + &format_attr_mask_rds.attr, + &format_attr_mask_rnid30.attr, + &format_attr_mask_rnid4.attr, + &format_attr_mask_dnid.attr, + &format_attr_mask_mc.attr, + &format_attr_mask_opc.attr, + &format_attr_mask_vnw.attr, + &format_attr_mask0.attr, + &format_attr_mask1.attr, + NULL, +}; + +static struct uncore_event_desc snbep_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), + { /* end: all zeroes */ }, +}; + +static struct uncore_event_desc snbep_uncore_qpi_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), + INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), + INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"), + INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"), + { /* end: all zeroes */ }, +}; + +static struct attribute_group snbep_uncore_format_group = { + .name = "format", + .attrs = snbep_uncore_formats_attr, +}; + +static struct attribute_group snbep_uncore_ubox_format_group = { + .name = "format", + .attrs = snbep_uncore_ubox_formats_attr, +}; + +static struct attribute_group snbep_uncore_cbox_format_group = { + .name = "format", + .attrs = snbep_uncore_cbox_formats_attr, +}; + +static struct attribute_group snbep_uncore_pcu_format_group = { + .name = "format", + .attrs = snbep_uncore_pcu_formats_attr, +}; + +static struct attribute_group snbep_uncore_qpi_format_group = { + .name = "format", + .attrs = snbep_uncore_qpi_formats_attr, +}; + +#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ + .init_box = snbep_uncore_msr_init_box, \ + .disable_box = snbep_uncore_msr_disable_box, \ + .enable_box = snbep_uncore_msr_enable_box, \ + .disable_event = snbep_uncore_msr_disable_event, \ + .enable_event = snbep_uncore_msr_enable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops snbep_uncore_msr_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), +}; + +#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \ + .init_box = snbep_uncore_pci_init_box, \ + .disable_box = snbep_uncore_pci_disable_box, \ + .enable_box = snbep_uncore_pci_enable_box, \ + .disable_event = snbep_uncore_pci_disable_event, \ + .read_counter = snbep_uncore_pci_read_counter + +static struct intel_uncore_ops snbep_uncore_pci_ops = { + SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), + .enable_event = snbep_uncore_pci_enable_event, \ +}; + +static struct event_constraint snbep_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x1), + UNCORE_EVENT_CONSTRAINT(0x02, 0x3), + UNCORE_EVENT_CONSTRAINT(0x04, 0x3), + UNCORE_EVENT_CONSTRAINT(0x05, 0x3), + UNCORE_EVENT_CONSTRAINT(0x07, 0x3), + UNCORE_EVENT_CONSTRAINT(0x09, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x3), + UNCORE_EVENT_CONSTRAINT(0x1b, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), + EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x35, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x1), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x24, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r3qpi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x20, 0x3), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x22, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x24, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2a, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2b, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x30, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x3), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snbep_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNBEP_U_MSR_PMON_CTR0, + .event_ctl = SNBEP_U_MSR_PMON_CTL0, + .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &snbep_uncore_msr_ops, + .format_group = &snbep_uncore_ubox_format_group, +}; + +static struct extra_reg snbep_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2), + EVENT_EXTRA_END +}; + +static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + int i; + + if (uncore_box_is_fake(box)) + return; + + for (i = 0; i < 5; i++) { + if (reg1->alloc & (0x1 << i)) + atomic_sub(1 << (i * 6), &er->ref); + } + reg1->alloc = 0; +} + +static struct event_constraint * +__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event, + u64 (*cbox_filter_mask)(int fields)) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + int i, alloc = 0; + unsigned long flags; + u64 mask; + + if (reg1->idx == EXTRA_REG_NONE) + return NULL; + + raw_spin_lock_irqsave(&er->lock, flags); + for (i = 0; i < 5; i++) { + if (!(reg1->idx & (0x1 << i))) + continue; + if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) + continue; + + mask = cbox_filter_mask(0x1 << i); + if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) || + !((reg1->config ^ er->config) & mask)) { + atomic_add(1 << (i * 6), &er->ref); + er->config &= ~mask; + er->config |= reg1->config & mask; + alloc |= (0x1 << i); + } else { + break; + } + } + raw_spin_unlock_irqrestore(&er->lock, flags); + if (i < 5) + goto fail; + + if (!uncore_box_is_fake(box)) + reg1->alloc |= alloc; + + return NULL; +fail: + for (; i >= 0; i--) { + if (alloc & (0x1 << i)) + atomic_sub(1 << (i * 6), &er->ref); + } + return &uncore_constraint_empty; +} + +static u64 snbep_cbox_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x4) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC; + + return mask; +} + +static struct event_constraint * +snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask); +} + +static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + + SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static struct intel_uncore_ops snbep_uncore_cbox_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_cbox_hw_config, + .get_constraint = snbep_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type snbep_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 44, + .event_ctl = SNBEP_C0_MSR_PMON_CTL0, + .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, + .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = SNBEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = snbep_uncore_cbox_constraints, + .ops = &snbep_uncore_cbox_ops, + .format_group = &snbep_uncore_cbox_format_group, +}; + +static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + u64 config = reg1->config; + + if (new_idx > reg1->idx) + config <<= 8 * (new_idx - reg1->idx); + else + config >>= 8 * (reg1->idx - new_idx); + + if (modify) { + hwc->config += new_idx - reg1->idx; + reg1->config = config; + reg1->idx = new_idx; + } + return config; +} + +static struct event_constraint * +snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + unsigned long flags; + int idx = reg1->idx; + u64 mask, config1 = reg1->config; + bool ok = false; + + if (reg1->idx == EXTRA_REG_NONE || + (!uncore_box_is_fake(box) && reg1->alloc)) + return NULL; +again: + mask = 0xffULL << (idx * 8); + raw_spin_lock_irqsave(&er->lock, flags); + if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || + !((config1 ^ er->config) & mask)) { + atomic_add(1 << (idx * 8), &er->ref); + er->config &= ~mask; + er->config |= config1 & mask; + ok = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (!ok) { + idx = (idx + 1) % 4; + if (idx != reg1->idx) { + config1 = snbep_pcu_alter_er(event, idx, false); + goto again; + } + return &uncore_constraint_empty; + } + + if (!uncore_box_is_fake(box)) { + if (idx != reg1->idx) + snbep_pcu_alter_er(event, idx, true); + reg1->alloc = 1; + } + return NULL; +} + +static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + atomic_sub(1 << (reg1->idx * 8), &er->ref); + reg1->alloc = 0; +} + +static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8)); + } + return 0; +} + +static struct intel_uncore_ops snbep_uncore_pcu_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type snbep_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, + .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_pcu_ops, + .format_group = &snbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *snbep_msr_uncores[] = { + &snbep_uncore_ubox, + &snbep_uncore_cbox, + &snbep_uncore_pcu, + NULL, +}; + +void snbep_uncore_cpu_init(void) +{ + if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = snbep_msr_uncores; +} + +enum { + SNBEP_PCI_QPI_PORT0_FILTER, + SNBEP_PCI_QPI_PORT1_FILTER, +}; + +static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) { + reg1->idx = 0; + reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0; + reg1->config = event->attr.config1; + reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0; + reg2->config = event->attr.config2; + } + return 0; +} + +static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; + struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx]; + if (filter_pdev) { + pci_write_config_dword(filter_pdev, reg1->reg, + (u32)reg1->config); + pci_write_config_dword(filter_pdev, reg1->reg + 4, + (u32)(reg1->config >> 32)); + pci_write_config_dword(filter_pdev, reg2->reg, + (u32)reg2->config); + pci_write_config_dword(filter_pdev, reg2->reg + 4, + (u32)(reg2->config >> 32)); + } + } + + pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops snbep_uncore_qpi_ops = { + SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), + .enable_event = snbep_qpi_enable_event, + .hw_config = snbep_qpi_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +#define SNBEP_UNCORE_PCI_COMMON_INIT() \ + .perf_ctr = SNBEP_PCI_PMON_CTR0, \ + .event_ctl = SNBEP_PCI_PMON_CTL0, \ + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ + .ops = &snbep_uncore_pci_ops, \ + .format_group = &snbep_uncore_format_group + +static struct intel_uncore_type snbep_uncore_ha = { + .name = "ha", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = snbep_uncore_imc_events, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_qpi_ops, + .event_descs = snbep_uncore_qpi_events, + .format_group = &snbep_uncore_qpi_format_group, +}; + + +static struct intel_uncore_type snbep_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r2pcie_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 2, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r3qpi_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + SNBEP_PCI_UNCORE_HA, + SNBEP_PCI_UNCORE_IMC, + SNBEP_PCI_UNCORE_QPI, + SNBEP_PCI_UNCORE_R2PCIE, + SNBEP_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *snbep_pci_uncores[] = { + [SNBEP_PCI_UNCORE_HA] = &snbep_uncore_ha, + [SNBEP_PCI_UNCORE_IMC] = &snbep_uncore_imc, + [SNBEP_PCI_UNCORE_QPI] = &snbep_uncore_qpi, + [SNBEP_PCI_UNCORE_R2PCIE] = &snbep_uncore_r2pcie, + [SNBEP_PCI_UNCORE_R3QPI] = &snbep_uncore_r3qpi, + NULL, +}; + +static const struct pci_device_id snbep_uncore_pci_ids[] = { + { /* Home Agent */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0), + }, + { /* MC Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0), + }, + { /* MC Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1), + }, + { /* MC Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2), + }, + { /* MC Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3), + }, + { /* QPI Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0), + }, + { /* QPI Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1), + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0), + }, + { /* R3QPI Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), + .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver snbep_uncore_pci_driver = { + .name = "snbep_uncore", + .id_table = snbep_uncore_pci_ids, +}; + +/* + * build pci bus to socket mapping + */ +static int snbep_pci2phy_map_init(int devid) +{ + struct pci_dev *ubox_dev = NULL; + int i, bus, nodeid; + int err = 0; + u32 config = 0; + + while (1) { + /* find the UBOX device */ + ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev); + if (!ubox_dev) + break; + bus = ubox_dev->bus->number; + /* get the Node ID of the local register */ + err = pci_read_config_dword(ubox_dev, 0x40, &config); + if (err) + break; + nodeid = config; + /* get the Node ID mapping */ + err = pci_read_config_dword(ubox_dev, 0x54, &config); + if (err) + break; + /* + * every three bits in the Node ID mapping register maps + * to a particular node. + */ + for (i = 0; i < 8; i++) { + if (nodeid == ((config >> (3 * i)) & 0x7)) { + uncore_pcibus_to_physid[bus] = i; + break; + } + } + } + + if (!err) { + /* + * For PCI bus with no UBOX device, find the next bus + * that has UBOX device and use its mapping. + */ + i = -1; + for (bus = 255; bus >= 0; bus--) { + if (uncore_pcibus_to_physid[bus] >= 0) + i = uncore_pcibus_to_physid[bus]; + else + uncore_pcibus_to_physid[bus] = i; + } + } + + if (ubox_dev) + pci_dev_put(ubox_dev); + + return err ? pcibios_err_to_errno(err) : 0; +} + +int snbep_uncore_pci_init(void) +{ + int ret = snbep_pci2phy_map_init(0x3ce0); + if (ret) + return ret; + uncore_pci_uncores = snbep_pci_uncores; + uncore_pci_driver = &snbep_uncore_pci_driver; + return 0; +} +/* end of Sandy Bridge-EP uncore support */ + +/* IvyTown uncore support */ +static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + if (msr) + wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT); +} + +static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + + pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT); +} + +#define IVBEP_UNCORE_MSR_OPS_COMMON_INIT() \ + .init_box = ivbep_uncore_msr_init_box, \ + .disable_box = snbep_uncore_msr_disable_box, \ + .enable_box = snbep_uncore_msr_enable_box, \ + .disable_event = snbep_uncore_msr_disable_event, \ + .enable_event = snbep_uncore_msr_enable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops ivbep_uncore_msr_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), +}; + +static struct intel_uncore_ops ivbep_uncore_pci_ops = { + .init_box = ivbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +#define IVBEP_UNCORE_PCI_COMMON_INIT() \ + .perf_ctr = SNBEP_PCI_PMON_CTR0, \ + .event_ctl = SNBEP_PCI_PMON_CTL0, \ + .event_mask = IVBEP_PMON_RAW_EVENT_MASK, \ + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ + .ops = &ivbep_uncore_pci_ops, \ + .format_group = &ivbep_uncore_format_group + +static struct attribute *ivbep_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid.attr, + &format_attr_filter_link.attr, + &format_attr_filter_state2.attr, + &format_attr_filter_nid2.attr, + &format_attr_filter_opc2.attr, + &format_attr_filter_nc.attr, + &format_attr_filter_c6.attr, + &format_attr_filter_isoc.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_pcu_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_occ_sel.attr, + &format_attr_edge.attr, + &format_attr_thresh5.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute *ivbep_uncore_qpi_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_thresh8.attr, + &format_attr_match_rds.attr, + &format_attr_match_rnid30.attr, + &format_attr_match_rnid4.attr, + &format_attr_match_dnid.attr, + &format_attr_match_mc.attr, + &format_attr_match_opc.attr, + &format_attr_match_vnw.attr, + &format_attr_match0.attr, + &format_attr_match1.attr, + &format_attr_mask_rds.attr, + &format_attr_mask_rnid30.attr, + &format_attr_mask_rnid4.attr, + &format_attr_mask_dnid.attr, + &format_attr_mask_mc.attr, + &format_attr_mask_opc.attr, + &format_attr_mask_vnw.attr, + &format_attr_mask0.attr, + &format_attr_mask1.attr, + NULL, +}; + +static struct attribute_group ivbep_uncore_format_group = { + .name = "format", + .attrs = ivbep_uncore_formats_attr, +}; + +static struct attribute_group ivbep_uncore_ubox_format_group = { + .name = "format", + .attrs = ivbep_uncore_ubox_formats_attr, +}; + +static struct attribute_group ivbep_uncore_cbox_format_group = { + .name = "format", + .attrs = ivbep_uncore_cbox_formats_attr, +}; + +static struct attribute_group ivbep_uncore_pcu_format_group = { + .name = "format", + .attrs = ivbep_uncore_pcu_formats_attr, +}; + +static struct attribute_group ivbep_uncore_qpi_format_group = { + .name = "format", + .attrs = ivbep_uncore_qpi_formats_attr, +}; + +static struct intel_uncore_type ivbep_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNBEP_U_MSR_PMON_CTR0, + .event_ctl = SNBEP_U_MSR_PMON_CTL0, + .event_mask = IVBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_ubox_format_group, +}; + +static struct extra_reg ivbep_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8), + EVENT_EXTRA_END +}; + +static u64 ivbep_cbox_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK; + if (fields & 0x4) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x10) { + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC; + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC; + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6; + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC; + } + + return mask; +} + +static struct event_constraint * +ivbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, ivbep_cbox_filter_mask); +} + +static int ivbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = ivbep_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + + SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & ivbep_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + u64 filter = uncore_shared_reg_config(box, 0); + wrmsrl(reg1->reg, filter & 0xffffffff); + wrmsrl(reg1->reg + 6, filter >> 32); + } + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops ivbep_uncore_cbox_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = ivbep_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = ivbep_cbox_hw_config, + .get_constraint = ivbep_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type ivbep_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 15, + .perf_ctr_bits = 44, + .event_ctl = SNBEP_C0_MSR_PMON_CTL0, + .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, + .event_mask = IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = SNBEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = snbep_uncore_cbox_constraints, + .ops = &ivbep_uncore_cbox_ops, + .format_group = &ivbep_uncore_cbox_format_group, +}; + +static struct intel_uncore_ops ivbep_uncore_pcu_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type ivbep_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, + .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, + .event_mask = IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &ivbep_uncore_pcu_ops, + .format_group = &ivbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *ivbep_msr_uncores[] = { + &ivbep_uncore_ubox, + &ivbep_uncore_cbox, + &ivbep_uncore_pcu, + NULL, +}; + +void ivbep_uncore_cpu_init(void) +{ + if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = ivbep_msr_uncores; +} + +static struct intel_uncore_type ivbep_uncore_ha = { + .name = "ha", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type ivbep_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = snbep_uncore_imc_events, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +/* registers in IRP boxes are not properly aligned */ +static unsigned ivbep_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4}; +static unsigned ivbep_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0}; + +static void ivbep_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], + hwc->config | SNBEP_PMON_CTL_EN); +} + +static void ivbep_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], hwc->config); +} + +static u64 ivbep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx], (u32 *)&count); + pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); + + return count; +} + +static struct intel_uncore_ops ivbep_uncore_irp_ops = { + .init_box = ivbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = ivbep_uncore_irp_disable_event, + .enable_event = ivbep_uncore_irp_enable_event, + .read_counter = ivbep_uncore_irp_read_counter, +}; + +static struct intel_uncore_type ivbep_uncore_irp = { + .name = "irp", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_mask = IVBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_irp_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct intel_uncore_ops ivbep_uncore_qpi_ops = { + .init_box = ivbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_qpi_enable_event, + .read_counter = snbep_uncore_pci_read_counter, + .hw_config = snbep_qpi_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type ivbep_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &ivbep_uncore_qpi_ops, + .format_group = &ivbep_uncore_qpi_format_group, +}; + +static struct intel_uncore_type ivbep_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r2pcie_constraints, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type ivbep_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 2, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r3qpi_constraints, + IVBEP_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + IVBEP_PCI_UNCORE_HA, + IVBEP_PCI_UNCORE_IMC, + IVBEP_PCI_UNCORE_IRP, + IVBEP_PCI_UNCORE_QPI, + IVBEP_PCI_UNCORE_R2PCIE, + IVBEP_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *ivbep_pci_uncores[] = { + [IVBEP_PCI_UNCORE_HA] = &ivbep_uncore_ha, + [IVBEP_PCI_UNCORE_IMC] = &ivbep_uncore_imc, + [IVBEP_PCI_UNCORE_IRP] = &ivbep_uncore_irp, + [IVBEP_PCI_UNCORE_QPI] = &ivbep_uncore_qpi, + [IVBEP_PCI_UNCORE_R2PCIE] = &ivbep_uncore_r2pcie, + [IVBEP_PCI_UNCORE_R3QPI] = &ivbep_uncore_r3qpi, + NULL, +}; + +static const struct pci_device_id ivbep_uncore_pci_ids[] = { + { /* Home Agent 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 0), + }, + { /* Home Agent 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 1), + }, + { /* MC0 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 0), + }, + { /* MC0 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 1), + }, + { /* MC0 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 2), + }, + { /* MC0 Channel 4 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 3), + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 4), + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 5), + }, + { /* MC1 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 6), + }, + { /* MC1 Channel 4 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 7), + }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IRP, 0), + }, + { /* QPI0 Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 0), + }, + { /* QPI0 Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 1), + }, + { /* QPI1 Port 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 2), + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R2PCIE, 0), + }, + { /* R3QPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 1), + }, + { /* R3QPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), + .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 2), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver ivbep_uncore_pci_driver = { + .name = "ivbep_uncore", + .id_table = ivbep_uncore_pci_ids, +}; + +int ivbep_uncore_pci_init(void) +{ + int ret = snbep_pci2phy_map_init(0x0e1e); + if (ret) + return ret; + uncore_pci_uncores = ivbep_pci_uncores; + uncore_pci_driver = &ivbep_uncore_pci_driver; + return 0; +} +/* end of IvyTown uncore support */ + +/* Haswell-EP uncore support */ +static struct attribute *hswep_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + &format_attr_filter_tid2.attr, + &format_attr_filter_cid.attr, + NULL, +}; + +static struct attribute_group hswep_uncore_ubox_format_group = { + .name = "format", + .attrs = hswep_uncore_ubox_formats_attr, +}; + +static int hswep_ubox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + reg1->reg = HSWEP_U_MSR_PMON_FILTER; + reg1->config = event->attr.config1 & HSWEP_U_MSR_PMON_BOX_FILTER_MASK; + reg1->idx = 0; + return 0; +} + +static struct intel_uncore_ops hswep_uncore_ubox_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = hswep_ubox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type hswep_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = HSWEP_U_MSR_PMON_CTR0, + .event_ctl = HSWEP_U_MSR_PMON_CTL0, + .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL, + .num_shared_regs = 1, + .ops = &hswep_uncore_ubox_ops, + .format_group = &hswep_uncore_ubox_format_group, +}; + +static struct attribute *hswep_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid3.attr, + &format_attr_filter_link2.attr, + &format_attr_filter_state3.attr, + &format_attr_filter_nid2.attr, + &format_attr_filter_opc2.attr, + &format_attr_filter_nc.attr, + &format_attr_filter_c6.attr, + &format_attr_filter_isoc.attr, + NULL, +}; + +static struct attribute_group hswep_uncore_cbox_format_group = { + .name = "format", + .attrs = hswep_uncore_cbox_formats_attr, +}; + +static struct event_constraint hswep_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x1), + UNCORE_EVENT_CONSTRAINT(0x09, 0x1), + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), + UNCORE_EVENT_CONSTRAINT(0x3e, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct extra_reg hswep_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4028, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4032, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4029, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4033, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x402A, 0x40ff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x12), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8), + EVENT_EXTRA_END +}; + +static u64 hswep_cbox_filter_mask(int fields) +{ + u64 mask = 0; + if (fields & 0x1) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK; + if (fields & 0x4) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x10) { + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC; + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NC; + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_C6; + mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC; + } + return mask; +} + +static struct event_constraint * +hswep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, hswep_cbox_filter_mask); +} + +static int hswep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = hswep_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 + + HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & hswep_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static void hswep_cbox_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + u64 filter = uncore_shared_reg_config(box, 0); + wrmsrl(reg1->reg, filter & 0xffffffff); + wrmsrl(reg1->reg + 1, filter >> 32); + } + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops hswep_uncore_cbox_ops = { + .init_box = snbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = hswep_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = hswep_cbox_hw_config, + .get_constraint = hswep_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type hswep_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 18, + .perf_ctr_bits = 44, + .event_ctl = HSWEP_C0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_C0_MSR_PMON_CTR0, + .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = hswep_uncore_cbox_constraints, + .ops = &hswep_uncore_cbox_ops, + .format_group = &hswep_uncore_cbox_format_group, +}; + +static struct attribute *hswep_uncore_sbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute_group hswep_uncore_sbox_format_group = { + .name = "format", + .attrs = hswep_uncore_sbox_formats_attr, +}; + +static struct intel_uncore_type hswep_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 44, + .event_ctl = HSWEP_S0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_S0_MSR_PMON_CTR0, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_SBOX_MSR_OFFSET, + .ops = &snbep_uncore_msr_ops, + .format_group = &hswep_uncore_sbox_format_group, +}; + +static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << reg1->idx); + } + return 0; +} + +static struct intel_uncore_ops hswep_uncore_pcu_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = hswep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type hswep_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0, + .event_ctl = HSWEP_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &hswep_uncore_pcu_ops, + .format_group = &snbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *hswep_msr_uncores[] = { + &hswep_uncore_ubox, + &hswep_uncore_cbox, + &hswep_uncore_sbox, + &hswep_uncore_pcu, + NULL, +}; + +void hswep_uncore_cpu_init(void) +{ + if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + uncore_msr_uncores = hswep_msr_uncores; +} + +static struct intel_uncore_type hswep_uncore_ha = { + .name = "ha", + .num_counters = 5, + .num_boxes = 2, + .perf_ctr_bits = 48, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct uncore_event_desc hswep_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type hswep_uncore_imc = { + .name = "imc", + .num_counters = 5, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = hswep_uncore_imc_events, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_ops hswep_uncore_irp_ops = { + .init_box = snbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = ivbep_uncore_irp_disable_event, + .enable_event = ivbep_uncore_irp_enable_event, + .read_counter = ivbep_uncore_irp_read_counter, +}; + +static struct intel_uncore_type hswep_uncore_irp = { + .name = "irp", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &hswep_uncore_irp_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type hswep_uncore_qpi = { + .name = "qpi", + .num_counters = 5, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_qpi_ops, + .format_group = &snbep_uncore_qpi_format_group, +}; + +static struct event_constraint hswep_uncore_r2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x23, 0x1), + UNCORE_EVENT_CONSTRAINT(0x24, 0x1), + UNCORE_EVENT_CONSTRAINT(0x25, 0x1), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x27, 0x1), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2a, 0x1), + UNCORE_EVENT_CONSTRAINT(0x2b, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x35, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type hswep_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .constraints = hswep_uncore_r2pcie_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct event_constraint hswep_uncore_r3qpi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x3), + UNCORE_EVENT_CONSTRAINT(0x07, 0x7), + UNCORE_EVENT_CONSTRAINT(0x08, 0x7), + UNCORE_EVENT_CONSTRAINT(0x09, 0x7), + UNCORE_EVENT_CONSTRAINT(0x0a, 0x7), + UNCORE_EVENT_CONSTRAINT(0x0e, 0x7), + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x15, 0x3), + UNCORE_EVENT_CONSTRAINT(0x1f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x20, 0x3), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x22, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x3), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type hswep_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 44, + .constraints = hswep_uncore_r3qpi_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + HSWEP_PCI_UNCORE_HA, + HSWEP_PCI_UNCORE_IMC, + HSWEP_PCI_UNCORE_IRP, + HSWEP_PCI_UNCORE_QPI, + HSWEP_PCI_UNCORE_R2PCIE, + HSWEP_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *hswep_pci_uncores[] = { + [HSWEP_PCI_UNCORE_HA] = &hswep_uncore_ha, + [HSWEP_PCI_UNCORE_IMC] = &hswep_uncore_imc, + [HSWEP_PCI_UNCORE_IRP] = &hswep_uncore_irp, + [HSWEP_PCI_UNCORE_QPI] = &hswep_uncore_qpi, + [HSWEP_PCI_UNCORE_R2PCIE] = &hswep_uncore_r2pcie, + [HSWEP_PCI_UNCORE_R3QPI] = &hswep_uncore_r3qpi, + NULL, +}; + +static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = { + { /* Home Agent 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0), + }, + { /* Home Agent 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f38), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 1), + }, + { /* MC0 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb0), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 0), + }, + { /* MC0 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb1), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 1), + }, + { /* MC0 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb4), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 2), + }, + { /* MC0 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb5), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 3), + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd0), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 4), + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd1), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 5), + }, + { /* MC1 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd4), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 6), + }, + { /* MC1 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd5), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 7), + }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f39), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IRP, 0), + }, + { /* QPI0 Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f32), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 0), + }, + { /* QPI0 Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f33), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 1), + }, + { /* QPI1 Port 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3a), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 2), + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f34), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R2PCIE, 0), + }, + { /* R3QPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f36), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 0), + }, + { /* R3QPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f37), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 1), + }, + { /* R3QPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3e), + .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 2), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 1 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver hswep_uncore_pci_driver = { + .name = "hswep_uncore", + .id_table = hswep_uncore_pci_ids, +}; + +int hswep_uncore_pci_init(void) +{ + int ret = snbep_pci2phy_map_init(0x2f1e); + if (ret) + return ret; + uncore_pci_uncores = hswep_pci_uncores; + uncore_pci_driver = &hswep_uncore_pci_driver; + return 0; +} +/* end of Haswell-EP uncore support */ diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 988c00a1f60d..49f886481615 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -682,15 +682,14 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len) * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). * * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. + * overlapping entries. */ void __init e820_mark_nosave_regions(unsigned long limit_pfn) { int i; - unsigned long pfn; + unsigned long pfn = 0; - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { + for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; if (pfn < PFN_UP(ei->addr)) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2fac1343a90b..df088bb03fb3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -404,8 +404,8 @@ GLOBAL(system_call_after_swapgs) * and short: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,0 - movq %rax,ORIG_RAX-ARGOFFSET(%rsp) + SAVE_ARGS 8, 0, rax_enosys=1 + movq_cfi rax,(ORIG_RAX-ARGOFFSET) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) @@ -417,7 +417,7 @@ system_call_fastpath: andl $__SYSCALL_MASK,%eax cmpl $__NR_syscall_max,%eax #endif - ja badsys + ja ret_from_sys_call /* and return regs->ax */ movq %r10,%rcx call *sys_call_table(,%rax,8) # XXX: rip relative movq %rax,RAX-ARGOFFSET(%rsp) @@ -476,28 +476,8 @@ sysret_signal: FIXUP_TOP_OF_STACK %r11, -ARGOFFSET jmp int_check_syscall_exit_work -badsys: - movq $-ENOSYS,RAX-ARGOFFSET(%rsp) - jmp ret_from_sys_call - #ifdef CONFIG_AUDITSYSCALL /* - * Fast path for syscall audit without full syscall trace. - * We just call __audit_syscall_entry() directly, and then - * jump back to the normal fast path. - */ -auditsys: - movq %r10,%r9 /* 6th arg: 4th syscall arg */ - movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ - movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ - movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ - movq %rax,%rsi /* 2nd arg: syscall number */ - movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ - call __audit_syscall_entry - LOAD_ARGS 0 /* reload call-clobbered registers */ - jmp system_call_fastpath - - /* * Return fast path for syscall audit. Call __audit_syscall_exit() * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT * masked off. @@ -514,18 +494,25 @@ sysret_audit: /* Do syscall tracing */ tracesys: -#ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) - jz auditsys -#endif + leaq -REST_SKIP(%rsp), %rdi + movq $AUDIT_ARCH_X86_64, %rsi + call syscall_trace_enter_phase1 + test %rax, %rax + jnz tracesys_phase2 /* if needed, run the slow path */ + LOAD_ARGS 0 /* else restore clobbered regs */ + jmp system_call_fastpath /* and return to the fast path */ + +tracesys_phase2: SAVE_REST - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ FIXUP_TOP_OF_STACK %rdi - movq %rsp,%rdi - call syscall_trace_enter + movq %rsp, %rdi + movq $AUDIT_ARCH_X86_64, %rsi + movq %rax,%rdx + call syscall_trace_enter_phase2 + /* * Reload arg registers from stack in case ptrace changed them. - * We don't reload %rax because syscall_trace_enter() returned + * We don't reload %rax because syscall_trace_entry_phase2() returned * the value it wants us to use in the table lookup. */ LOAD_ARGS ARGOFFSET, 1 @@ -536,7 +523,7 @@ tracesys: andl $__SYSCALL_MASK,%eax cmpl $__NR_syscall_max,%eax #endif - ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ + ja int_ret_from_sys_call /* RAX(%rsp) is already set */ movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c index 9030e83db6ee..82f8d02f0df2 100644 --- a/arch/x86/kernel/iosf_mbi.c +++ b/arch/x86/kernel/iosf_mbi.c @@ -22,10 +22,13 @@ #include <linux/init.h> #include <linux/spinlock.h> #include <linux/pci.h> +#include <linux/debugfs.h> +#include <linux/capability.h> #include <asm/iosf_mbi.h> #define PCI_DEVICE_ID_BAYTRAIL 0x0F00 +#define PCI_DEVICE_ID_BRASWELL 0x2280 #define PCI_DEVICE_ID_QUARK_X1000 0x0958 static DEFINE_SPINLOCK(iosf_mbi_lock); @@ -187,6 +190,89 @@ bool iosf_mbi_available(void) } EXPORT_SYMBOL(iosf_mbi_available); +#ifdef CONFIG_IOSF_MBI_DEBUG +static u32 dbg_mdr; +static u32 dbg_mcr; +static u32 dbg_mcrx; + +static int mcr_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} + +static int mcr_set(void *data, u64 val) +{ + u8 command = ((u32)val & 0xFF000000) >> 24, + port = ((u32)val & 0x00FF0000) >> 16, + offset = ((u32)val & 0x0000FF00) >> 8; + int err; + + *(u32 *)data = val; + + if (!capable(CAP_SYS_RAWIO)) + return -EACCES; + + if (command & 1u) + err = iosf_mbi_write(port, + command, + dbg_mcrx | offset, + dbg_mdr); + else + err = iosf_mbi_read(port, + command, + dbg_mcrx | offset, + &dbg_mdr); + + return err; +} +DEFINE_SIMPLE_ATTRIBUTE(iosf_mcr_fops, mcr_get, mcr_set , "%llx\n"); + +static struct dentry *iosf_dbg; + +static void iosf_sideband_debug_init(void) +{ + struct dentry *d; + + iosf_dbg = debugfs_create_dir("iosf_sb", NULL); + if (IS_ERR_OR_NULL(iosf_dbg)) + return; + + /* mdr */ + d = debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr); + if (IS_ERR_OR_NULL(d)) + goto cleanup; + + /* mcrx */ + debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx); + if (IS_ERR_OR_NULL(d)) + goto cleanup; + + /* mcr - initiates mailbox tranaction */ + debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops); + if (IS_ERR_OR_NULL(d)) + goto cleanup; + + return; + +cleanup: + debugfs_remove_recursive(d); +} + +static void iosf_debugfs_init(void) +{ + iosf_sideband_debug_init(); +} + +static void iosf_debugfs_remove(void) +{ + debugfs_remove_recursive(iosf_dbg); +} +#else +static inline void iosf_debugfs_init(void) { } +static inline void iosf_debugfs_remove(void) { } +#endif /* CONFIG_IOSF_MBI_DEBUG */ + static int iosf_mbi_probe(struct pci_dev *pdev, const struct pci_device_id *unused) { @@ -204,6 +290,7 @@ static int iosf_mbi_probe(struct pci_dev *pdev, static const struct pci_device_id iosf_mbi_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BAYTRAIL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BRASWELL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_QUARK_X1000) }, { 0, }, }; @@ -217,11 +304,15 @@ static struct pci_driver iosf_mbi_pci_driver = { static int __init iosf_mbi_init(void) { + iosf_debugfs_init(); + return pci_register_driver(&iosf_mbi_pci_driver); } static void __exit iosf_mbi_exit(void) { + iosf_debugfs_remove(); + pci_unregister_driver(&iosf_mbi_pci_driver); if (mbi_pdev) { pci_dev_put(mbi_pdev); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 1667b1de8d5d..72e8e310258d 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -247,7 +247,8 @@ void machine_kexec(struct kimage *image) /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, (unsigned long)page_list, - image->start, cpu_has_pae, + image->start, + boot_cpu_has(X86_FEATURE_PAE), image->preserve_context); #ifdef CONFIG_KEXEC_JUMP diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c index 0c424a67985d..0ee5025e0fa4 100644 --- a/arch/x86/kernel/pmc_atom.c +++ b/arch/x86/kernel/pmc_atom.c @@ -235,6 +235,11 @@ err: pmc_dbgfs_unregister(pmc); return -ENODEV; } +#else +static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) +{ + return 0; +} #endif /* CONFIG_DEBUG_FS */ static int pmc_setup_dev(struct pci_dev *pdev) @@ -262,14 +267,12 @@ static int pmc_setup_dev(struct pci_dev *pdev) /* PMC hardware registers setup */ pmc_hw_reg_setup(pmc); -#ifdef CONFIG_DEBUG_FS ret = pmc_dbgfs_register(pmc, pdev); if (ret) { iounmap(pmc->regmap); - return ret; } -#endif /* CONFIG_DEBUG_FS */ - return 0; + + return ret; } /* diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S deleted file mode 100644 index ca7f0d58a87d..000000000000 --- a/arch/x86/kernel/preempt.S +++ /dev/null @@ -1,25 +0,0 @@ - -#include <linux/linkage.h> -#include <asm/dwarf2.h> -#include <asm/asm.h> -#include <asm/calling.h> - -ENTRY(___preempt_schedule) - CFI_STARTPROC - SAVE_ALL - call preempt_schedule - RESTORE_ALL - ret - CFI_ENDPROC - -#ifdef CONFIG_CONTEXT_TRACKING - -ENTRY(___preempt_schedule_context) - CFI_STARTPROC - SAVE_ALL - call preempt_schedule_context - RESTORE_ALL - ret - CFI_ENDPROC - -#endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f804dc935d2a..e127ddaa2d5a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -64,14 +64,16 @@ EXPORT_SYMBOL_GPL(task_xstate_cachep); */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - int ret; - *dst = *src; - if (fpu_allocated(&src->thread.fpu)) { - memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); - ret = fpu_alloc(&dst->thread.fpu); - if (ret) - return ret; + + dst->thread.fpu_counter = 0; + dst->thread.fpu.has_fpu = 0; + dst->thread.fpu.last_cpu = ~0; + dst->thread.fpu.state = NULL; + if (tsk_used_math(src)) { + int err = fpu_alloc(&dst->thread.fpu); + if (err) + return err; fpu_copy(dst, src); } return 0; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7bc86bbe7485..8f3ebfe710d0 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -138,6 +138,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ @@ -152,9 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->orig_ax = -1; childregs->cs = __KERNEL_CS | get_kernel_rpl(); childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; - p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); return 0; } *childregs = *current_pt_regs(); @@ -165,13 +164,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.ip = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(current_pt_regs()); - p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ca5b02d405c3..3ed4a68d4013 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -163,7 +163,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) childregs; p->thread.usersp = me->thread.usersp; set_tsk_thread_flag(p, TIF_FORK); - p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); @@ -193,8 +192,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->sp = sp; err = -ENOMEM; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 678c0ada3b3c..29576c244699 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1441,24 +1441,126 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, force_sig_info(SIGTRAP, &info, tsk); } - -#ifdef CONFIG_X86_32 -# define IS_IA32 1 -#elif defined CONFIG_IA32_EMULATION -# define IS_IA32 is_compat_task() -#else -# define IS_IA32 0 +static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) +{ +#ifdef CONFIG_X86_64 + if (arch == AUDIT_ARCH_X86_64) { + audit_syscall_entry(arch, regs->orig_ax, regs->di, + regs->si, regs->dx, regs->r10); + } else #endif + { + audit_syscall_entry(arch, regs->orig_ax, regs->bx, + regs->cx, regs->dx, regs->si); + } +} /* - * We must return the syscall number to actually look up in the table. - * This can be -1L to skip running any syscall at all. + * We can return 0 to resume the syscall or anything else to go to phase + * 2. If we resume the syscall, we need to put something appropriate in + * regs->orig_ax. + * + * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax + * are fully functional. + * + * For phase 2's benefit, our return value is: + * 0: resume the syscall + * 1: go to phase 2; no seccomp phase 2 needed + * anything else: go to phase 2; pass return value to seccomp */ -long syscall_trace_enter(struct pt_regs *regs) +unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) +{ + unsigned long ret = 0; + u32 work; + + BUG_ON(regs != task_pt_regs(current)); + + work = ACCESS_ONCE(current_thread_info()->flags) & + _TIF_WORK_SYSCALL_ENTRY; + + /* + * If TIF_NOHZ is set, we are required to call user_exit() before + * doing anything that could touch RCU. + */ + if (work & _TIF_NOHZ) { + user_exit(); + work &= ~TIF_NOHZ; + } + +#ifdef CONFIG_SECCOMP + /* + * Do seccomp first -- it should minimize exposure of other + * code, and keeping seccomp fast is probably more valuable + * than the rest of this. + */ + if (work & _TIF_SECCOMP) { + struct seccomp_data sd; + + sd.arch = arch; + sd.nr = regs->orig_ax; + sd.instruction_pointer = regs->ip; +#ifdef CONFIG_X86_64 + if (arch == AUDIT_ARCH_X86_64) { + sd.args[0] = regs->di; + sd.args[1] = regs->si; + sd.args[2] = regs->dx; + sd.args[3] = regs->r10; + sd.args[4] = regs->r8; + sd.args[5] = regs->r9; + } else +#endif + { + sd.args[0] = regs->bx; + sd.args[1] = regs->cx; + sd.args[2] = regs->dx; + sd.args[3] = regs->si; + sd.args[4] = regs->di; + sd.args[5] = regs->bp; + } + + BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); + BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); + + ret = seccomp_phase1(&sd); + if (ret == SECCOMP_PHASE1_SKIP) { + regs->orig_ax = -1; + ret = 0; + } else if (ret != SECCOMP_PHASE1_OK) { + return ret; /* Go directly to phase 2 */ + } + + work &= ~_TIF_SECCOMP; + } +#endif + + /* Do our best to finish without phase 2. */ + if (work == 0) + return ret; /* seccomp and/or nohz only (ret == 0 here) */ + +#ifdef CONFIG_AUDITSYSCALL + if (work == _TIF_SYSCALL_AUDIT) { + /* + * If there is no more work to be done except auditing, + * then audit in phase 1. Phase 2 always audits, so, if + * we audit here, then we can't go on to phase 2. + */ + do_audit_syscall_entry(regs, arch); + return 0; + } +#endif + + return 1; /* Something is enabled that we can't handle in phase 1 */ +} + +/* Returns the syscall nr to run (which should match regs->orig_ax). */ +long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, + unsigned long phase1_result) { long ret = 0; + u32 work = ACCESS_ONCE(current_thread_info()->flags) & + _TIF_WORK_SYSCALL_ENTRY; - user_exit(); + BUG_ON(regs != task_pt_regs(current)); /* * If we stepped into a sysenter/syscall insn, it trapped in @@ -1467,17 +1569,21 @@ long syscall_trace_enter(struct pt_regs *regs) * do_debug() and we need to set it again to restore the user * state. If we entered on the slow path, TF was already set. */ - if (test_thread_flag(TIF_SINGLESTEP)) + if (work & _TIF_SINGLESTEP) regs->flags |= X86_EFLAGS_TF; - /* do the secure computing check first */ - if (secure_computing(regs->orig_ax)) { +#ifdef CONFIG_SECCOMP + /* + * Call seccomp_phase2 before running the other hooks so that + * they can see any changes made by a seccomp tracer. + */ + if (phase1_result > 1 && seccomp_phase2(phase1_result)) { /* seccomp failures shouldn't expose any additional code. */ - ret = -1L; - goto out; + return -1; } +#endif - if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) + if (unlikely(work & _TIF_SYSCALL_EMU)) ret = -1L; if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && @@ -1487,23 +1593,22 @@ long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->orig_ax); - if (IS_IA32) - audit_syscall_entry(AUDIT_ARCH_I386, - regs->orig_ax, - regs->bx, regs->cx, - regs->dx, regs->si); -#ifdef CONFIG_X86_64 - else - audit_syscall_entry(AUDIT_ARCH_X86_64, - regs->orig_ax, - regs->di, regs->si, - regs->dx, regs->r10); -#endif + do_audit_syscall_entry(regs, arch); -out: return ret ?: regs->orig_ax; } +long syscall_trace_enter(struct pt_regs *regs) +{ + u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); + + if (phase1_result == 0) + return regs->orig_ax; + else + return syscall_trace_enter_phase2(regs, arch, phase1_result); +} + void syscall_trace_leave(struct pt_regs *regs) { bool step; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index ff898bbf579d..176a0f99d4da 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -498,6 +498,24 @@ void force_hpet_resume(void) } /* + * According to the datasheet e6xx systems have the HPET hardwired to + * 0xfed00000 + */ +static void e6xx_force_enable_hpet(struct pci_dev *dev) +{ + if (hpet_address || force_hpet_address) + return; + + force_hpet_address = 0xFED00000; + force_hpet_resume_type = NONE_FORCE_HPET_RESUME; + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " + "0x%lx\n", force_hpet_address); + return; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU, + e6xx_force_enable_hpet); + +/* * HPET MSI on some boards (ATI SB700/SB800) has side effect on * floppy DMA. Disable HPET MSI on such platforms. * See erratum #27 (Misinterpreted MSI Requests May Result in diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 41ead8d3bc0b..235cfd39e0d7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -879,6 +879,15 @@ void __init setup_arch(char **cmdline_p) KERNEL_PGD_PTRS); load_cr3(swapper_pg_dir); + /* + * Note: Quark X1000 CPUs advertise PGE incorrectly and require + * a cr3 based tlb flush, so the following __flush_tlb_all() + * will not flush anything because the cpu quirk which clears + * X86_FEATURE_PGE has not been invoked yet. Though due to the + * load_cr3() above the TLB has been flushed already. The + * quirk is invoked before subsequent calls to __flush_tlb_all() + * so proper operation is guaranteed. + */ __flush_tlb_all(); #else printk(KERN_INFO "Command line: %s\n", boot_command_line); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 2851d63c1202..ed37a768d0fc 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -675,6 +675,11 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * handler too. */ regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF); + /* + * Ensure the signal handler starts with the new fpu state. + */ + if (used_math()) + drop_init_fpu(current); } signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 42a2dca984b3..2d5200e56357 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -102,6 +102,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +static DEFINE_PER_CPU(struct completion, die_complete); + atomic_t init_deasserted; /* @@ -111,7 +113,6 @@ atomic_t init_deasserted; static void smp_callin(void) { int cpuid, phys_id; - unsigned long timeout; /* * If waken up by an INIT in an 82489DX configuration @@ -130,37 +131,6 @@ static void smp_callin(void) * (This works even if the APIC is not enabled.) */ phys_id = read_apic_id(); - if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { - panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, - phys_id, cpuid); - } - pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); - - /* - * STARTUP IPIs are fragile beasts as they might sometimes - * trigger some glue motherboard logic. Complete APIC bus - * silence for 1 second, this overestimates the time the - * boot CPU is spending to send the up to 2 STARTUP IPIs - * by a factor of two. This should be enough. - */ - - /* - * Waiting 2s total for startup (udelay is not yet working) - */ - timeout = jiffies + 2*HZ; - while (time_before(jiffies, timeout)) { - /* - * Has the boot CPU finished it's STARTUP sequence? - */ - if (cpumask_test_cpu(cpuid, cpu_callout_mask)) - break; - cpu_relax(); - } - - if (!time_before(jiffies, timeout)) { - panic("%s: CPU%d started up but did not get a callout!\n", - __func__, cpuid); - } /* * the boot CPU has finished the init stage and is spinning @@ -296,11 +266,19 @@ void smp_store_cpu_info(int id) } static bool +topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) +{ + int cpu1 = c->cpu_index, cpu2 = o->cpu_index; + + return (cpu_to_node(cpu1) == cpu_to_node(cpu2)); +} + +static bool topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; - return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2), + return !WARN_ONCE(!topology_same_node(c, o), "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! " "[node: %d != %d]. Ignoring dependency.\n", cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2)); @@ -341,17 +319,44 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) return false; } -static bool match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) +/* + * Unlike the other levels, we do not enforce keeping a + * multicore group inside a NUMA node. If this happens, we will + * discard the MC level of the topology later. + */ +static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { - if (c->phys_proc_id == o->phys_proc_id) { - if (cpu_has(c, X86_FEATURE_AMD_DCM)) - return true; - - return topology_sane(c, o, "mc"); - } + if (c->phys_proc_id == o->phys_proc_id) + return true; return false; } +static struct sched_domain_topology_level numa_inside_package_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, +#endif +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, +#endif + { NULL, }, +}; +/* + * set_sched_topology() sets the topology internal to a CPU. The + * NUMA topologies are layered on top of it to build the full + * system topology. + * + * If NUMA nodes are observed to occur within a CPU package, this + * function should be called. It forces the sched domain code to + * only use the SMT level for the CPU portion of the topology. + * This essentially falls back to relying on NUMA information + * from the SRAT table to describe the entire system topology + * (except for hyperthreads). + */ +static void primarily_use_numa_for_topology(void) +{ + set_sched_topology(numa_inside_package_topology); +} + void set_cpu_sibling_map(int cpu) { bool has_smt = smp_num_siblings > 1; @@ -388,7 +393,7 @@ void set_cpu_sibling_map(int cpu) for_each_cpu(i, cpu_sibling_setup_mask) { o = &cpu_data(i); - if ((i == cpu) || (has_mp && match_mc(c, o))) { + if ((i == cpu) || (has_mp && match_die(c, o))) { link_mask(core, cpu, i); /* @@ -410,6 +415,8 @@ void set_cpu_sibling_map(int cpu) } else if (i != cpu && !c->booted_cores) c->booted_cores = cpu_data(i).booted_cores; } + if (match_die(c, o) && !topology_same_node(c, o)) + primarily_use_numa_for_topology(); } } @@ -753,8 +760,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long start_ip = real_mode_header->trampoline_start; unsigned long boot_error = 0; - int timeout; int cpu0_nmi_registered = 0; + unsigned long timeout; /* Just in case we booted with a single CPU. */ alternatives_enable_smp(); @@ -802,6 +809,15 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) } /* + * AP might wait on cpu_callout_mask in cpu_init() with + * cpu_initialized_mask set if previous attempt to online + * it timed-out. Clear cpu_initialized_mask so that after + * INIT/SIPI it could start with a clean state. + */ + cpumask_clear_cpu(cpu, cpu_initialized_mask); + smp_mb(); + + /* * Wake up a CPU in difference cases: * - Use the method in the APIC driver if it's defined * Otherwise, @@ -815,53 +831,38 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) if (!boot_error) { /* - * allow APs to start initializing. + * Wait 10s total for a response from AP */ - pr_debug("Before Callout %d\n", cpu); - cpumask_set_cpu(cpu, cpu_callout_mask); - pr_debug("After Callout %d\n", cpu); + boot_error = -1; + timeout = jiffies + 10*HZ; + while (time_before(jiffies, timeout)) { + if (cpumask_test_cpu(cpu, cpu_initialized_mask)) { + /* + * Tell AP to proceed with initialization + */ + cpumask_set_cpu(cpu, cpu_callout_mask); + boot_error = 0; + break; + } + udelay(100); + schedule(); + } + } + if (!boot_error) { /* - * Wait 5s total for a response + * Wait till AP completes initial initialization */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpumask_test_cpu(cpu, cpu_callin_mask)) - break; /* It has booted */ - udelay(100); + while (!cpumask_test_cpu(cpu, cpu_callin_mask)) { /* * Allow other tasks to run while we wait for the * AP to come online. This also gives a chance * for the MTRR work(triggered by the AP coming online) * to be completed in the stop machine context. */ + udelay(100); schedule(); } - - if (cpumask_test_cpu(cpu, cpu_callin_mask)) { - print_cpu_msr(&cpu_data(cpu)); - pr_debug("CPU%d: has booted.\n", cpu); - } else { - boot_error = 1; - if (*trampoline_status == 0xA5A5A5A5) - /* trampoline started but...? */ - pr_err("CPU%d: Stuck ??\n", cpu); - else - /* trampoline code not run */ - pr_err("CPU%d: Not responding\n", cpu); - if (apic->inquire_remote_apic) - apic->inquire_remote_apic(apicid); - } - } - - if (boot_error) { - /* Try to put things back the way they were before ... */ - numa_remove_cpu(cpu); /* was set by numa_add_cpu */ - - /* was set by do_boot_cpu() */ - cpumask_clear_cpu(cpu, cpu_callout_mask); - - /* was set by cpu_init() */ - cpumask_clear_cpu(cpu, cpu_initialized_mask); } /* mark "stuck" area as not stuck */ @@ -1326,26 +1327,24 @@ int native_cpu_disable(void) return ret; clear_local_APIC(); - + init_completion(&per_cpu(die_complete, smp_processor_id())); cpu_disable_common(); + return 0; } void native_cpu_die(unsigned int cpu) { /* We don't do anything here: idle task is faking death itself. */ - unsigned int i; + wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); - for (i = 0; i < 10; i++) { - /* They ack this in play_dead by setting CPU_DEAD */ - if (per_cpu(cpu_state, cpu) == CPU_DEAD) { - if (system_state == SYSTEM_RUNNING) - pr_info("CPU %u is now offline\n", cpu); - return; - } - msleep(100); + /* They ack this in play_dead() by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + if (system_state == SYSTEM_RUNNING) + pr_info("CPU %u is now offline\n", cpu); + } else { + pr_err("CPU %u didn't die...\n", cpu); } - pr_err("CPU %u didn't die...\n", cpu); } void play_dead_common(void) @@ -1357,6 +1356,7 @@ void play_dead_common(void) mb(); /* Ack it */ __this_cpu_write(cpu_state, CPU_DEAD); + complete(&per_cpu(die_complete, smp_processor_id())); /* * With physical CPU hotplug, we should halt the cpu diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e1e1e80fc6a6..957779f4eb40 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -216,7 +216,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) */ regs->orig_ax = syscall_nr; regs->ax = -ENOSYS; - tmp = secure_computing(syscall_nr); + tmp = secure_computing(); if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 940b142cc11f..4c540c4719d8 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -271,8 +271,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) return -1; - drop_init_fpu(tsk); /* trigger finit */ - return 0; } @@ -402,8 +400,11 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) set_used_math(); } - if (use_eager_fpu()) + if (use_eager_fpu()) { + preempt_disable(); math_state_restore(); + preempt_enable(); + } return err; } else { diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 4d4f96a27638..db92793b7e23 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -20,7 +20,6 @@ lib-y := delay.o misc.o cmdline.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o lib-y += memcpy_$(BITS).o -lib-$(CONFIG_SMP) += rwlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o @@ -39,7 +38,7 @@ endif else obj-y += iomap_copy_64.o lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o - lib-y += thunk_64.o clear_page_64.o copy_page_64.o + lib-y += clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o copy_user_nocache_64.o lib-y += cmpxchg16b_emu.o diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 1e572c507d06..40a172541ee2 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -6,15 +6,8 @@ * */ #include <linux/linkage.h> -#include <asm/alternative-asm.h> -#include <asm/frame.h> #include <asm/dwarf2.h> - -#ifdef CONFIG_SMP -#define SEG_PREFIX %gs: -#else -#define SEG_PREFIX -#endif +#include <asm/percpu.h> .text @@ -39,24 +32,25 @@ CFI_STARTPROC # *atomic* on a single cpu (as provided by the this_cpu_xx class of # macros). # -this_cpu_cmpxchg16b_emu: - pushf + pushfq_cfi cli - cmpq SEG_PREFIX(%rsi), %rax - jne not_same - cmpq SEG_PREFIX 8(%rsi), %rdx - jne not_same + cmpq PER_CPU_VAR((%rsi)), %rax + jne .Lnot_same + cmpq PER_CPU_VAR(8(%rsi)), %rdx + jne .Lnot_same - movq %rbx, SEG_PREFIX(%rsi) - movq %rcx, SEG_PREFIX 8(%rsi) + movq %rbx, PER_CPU_VAR((%rsi)) + movq %rcx, PER_CPU_VAR(8(%rsi)) - popf + CFI_REMEMBER_STATE + popfq_cfi mov $1, %al ret - not_same: - popf + CFI_RESTORE_STATE +.Lnot_same: + popfq_cfi xor %al,%al ret diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index 828cb710dec2..b4807fce5177 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -7,11 +7,8 @@ */ #include <linux/linkage.h> -#include <asm/alternative-asm.h> -#include <asm/frame.h> #include <asm/dwarf2.h> - .text /* @@ -30,27 +27,28 @@ CFI_STARTPROC # set the whole ZF thing (caller will just compare # eax:edx with the expected value) # -cmpxchg8b_emu: - pushfl + pushfl_cfi cli cmpl (%esi), %eax - jne not_same + jne .Lnot_same cmpl 4(%esi), %edx - jne half_same + jne .Lhalf_same movl %ebx, (%esi) movl %ecx, 4(%esi) - popfl + CFI_REMEMBER_STATE + popfl_cfi ret - not_same: + CFI_RESTORE_STATE +.Lnot_same: movl (%esi), %eax - half_same: +.Lhalf_same: movl 4(%esi), %edx - popfl + popfl_cfi ret CFI_ENDPROC diff --git a/arch/x86/lib/rwlock.S b/arch/x86/lib/rwlock.S deleted file mode 100644 index 1cad22139c88..000000000000 --- a/arch/x86/lib/rwlock.S +++ /dev/null @@ -1,44 +0,0 @@ -/* Slow paths of read/write spinlocks. */ - -#include <linux/linkage.h> -#include <asm/alternative-asm.h> -#include <asm/frame.h> -#include <asm/rwlock.h> - -#ifdef CONFIG_X86_32 -# define __lock_ptr eax -#else -# define __lock_ptr rdi -#endif - -ENTRY(__write_lock_failed) - CFI_STARTPROC - FRAME -0: LOCK_PREFIX - WRITE_LOCK_ADD($RW_LOCK_BIAS) (%__lock_ptr) -1: rep; nop - cmpl $WRITE_LOCK_CMP, (%__lock_ptr) - jne 1b - LOCK_PREFIX - WRITE_LOCK_SUB($RW_LOCK_BIAS) (%__lock_ptr) - jnz 0b - ENDFRAME - ret - CFI_ENDPROC -END(__write_lock_failed) - -ENTRY(__read_lock_failed) - CFI_STARTPROC - FRAME -0: LOCK_PREFIX - READ_LOCK_SIZE(inc) (%__lock_ptr) -1: rep; nop - READ_LOCK_SIZE(cmp) $1, (%__lock_ptr) - js 1b - LOCK_PREFIX - READ_LOCK_SIZE(dec) (%__lock_ptr) - js 0b - ENDFRAME - ret - CFI_ENDPROC -END(__read_lock_failed) diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 28f85c916712..e28cdaf5ac2c 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -6,25 +6,46 @@ */ #include <linux/linkage.h> #include <asm/asm.h> + #include <asm/dwarf2.h> -#ifdef CONFIG_TRACE_IRQFLAGS /* put return address in eax (arg1) */ - .macro thunk_ra name,func + .macro THUNK name, func, put_ret_addr_in_eax=0 .globl \name \name: - pushl %eax - pushl %ecx - pushl %edx + CFI_STARTPROC + pushl_cfi %eax + CFI_REL_OFFSET eax, 0 + pushl_cfi %ecx + CFI_REL_OFFSET ecx, 0 + pushl_cfi %edx + CFI_REL_OFFSET edx, 0 + + .if \put_ret_addr_in_eax /* Place EIP in the arg1 */ movl 3*4(%esp), %eax + .endif + call \func - popl %edx - popl %ecx - popl %eax + popl_cfi %edx + CFI_RESTORE edx + popl_cfi %ecx + CFI_RESTORE ecx + popl_cfi %eax + CFI_RESTORE eax ret + CFI_ENDPROC _ASM_NOKPROBE(\name) .endm - thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller - thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller +#ifdef CONFIG_TRACE_IRQFLAGS + THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1 + THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 +#endif + +#ifdef CONFIG_PREEMPT + THUNK ___preempt_schedule, preempt_schedule +#ifdef CONFIG_CONTEXT_TRACKING + THUNK ___preempt_schedule_context, preempt_schedule_context #endif +#endif + diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index 92d9feaff42b..b30b5ebd614a 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -38,6 +38,13 @@ THUNK lockdep_sys_exit_thunk,lockdep_sys_exit #endif +#ifdef CONFIG_PREEMPT + THUNK ___preempt_schedule, preempt_schedule +#ifdef CONFIG_CONTEXT_TRACKING + THUNK ___preempt_schedule_context, preempt_schedule_context +#endif +#endif + /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC SAVE_ARGS diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a24194681513..d973e61e450d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -3,7 +3,6 @@ * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ -#include <linux/magic.h> /* STACK_END_MAGIC */ #include <linux/sched.h> /* test_thread_flag(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ #include <linux/module.h> /* search_exception_table */ @@ -350,7 +349,7 @@ out: void vmalloc_sync_all(void) { - sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); + sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0); } /* @@ -649,7 +648,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, unsigned long address, int signal, int si_code) { struct task_struct *tsk = current; - unsigned long *stackend; unsigned long flags; int sig; @@ -709,8 +707,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, show_fault_oops(regs, error_code, address); - stackend = end_of_stack(tsk); - if (tsk != &init_task && *stackend != STACK_END_MAGIC) + if (task_stack_end_corrupted(tsk)) printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); tsk->thread.cr2 = address; @@ -933,8 +930,17 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * cross-processor TLB flush, even if no stale TLB entries exist * on other processors. * + * Spurious faults may only occur if the TLB contains an entry with + * fewer permission than the page table entry. Non-present (P = 0) + * and reserved bit (R = 1) faults are never spurious. + * * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. + * + * Returns non-zero if a spurious fault was handled, zero otherwise. + * + * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3 + * (Optional Invalidation). */ static noinline int spurious_fault(unsigned long error_code, unsigned long address) @@ -945,8 +951,17 @@ spurious_fault(unsigned long error_code, unsigned long address) pte_t *pte; int ret; - /* Reserved-bit violation or user access to kernel space? */ - if (error_code & (PF_USER | PF_RSVD)) + /* + * Only writes to RO or instruction fetches from NX may cause + * spurious faults. + * + * These could be from user or supervisor accesses but the TLB + * is only lazily flushed after a kernel mapping protection + * change, so user accesses are not expected to cause spurious + * faults. + */ + if (error_code != (PF_WRITE | PF_PROT) + && error_code != (PF_INSTR | PF_PROT)) return 0; pgd = init_mm.pgd + pgd_index(address); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 7d05565ba781..c8140e12816a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -537,7 +537,7 @@ static void __init pagetable_init(void) permanent_kmaps_init(pgd_base); } -pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); +pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); EXPORT_SYMBOL_GPL(__supported_pte_mask); /* user-defined highmem size */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5621c47d7a1a..4cb8763868fc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -151,7 +151,7 @@ early_param("gbpages", parse_direct_gbpages_on); * around without checking the pgd every time. */ -pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; +pteval_t __supported_pte_mask __read_mostly = ~0; EXPORT_SYMBOL_GPL(__supported_pte_mask); int force_personality32; @@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup); * When memory was added/removed make sure all the processes MM have * suitable PGD entries in the local PGD level page. */ -void sync_global_pgds(unsigned long start, unsigned long end) +void sync_global_pgds(unsigned long start, unsigned long end, int removed) { unsigned long address; @@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end) const pgd_t *pgd_ref = pgd_offset_k(address); struct page *page; - if (pgd_none(*pgd_ref)) + /* + * When it is called after memory hot remove, pgd_none() + * returns true. In this case (removed == 1), we must clear + * the PGD entries in the local PGD level page. + */ + if (pgd_none(*pgd_ref) && !removed) continue; spin_lock(&pgd_lock); @@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else + if (!pgd_none(*pgd_ref) && !pgd_none(*pgd)) BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + if (removed) { + if (pgd_none(*pgd_ref) && !pgd_none(*pgd)) + pgd_clear(pgd); + } else { + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + } + spin_unlock(pgt_lock); } spin_unlock(&pgd_lock); @@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start, } if (pgd_changed) - sync_global_pgds(addr, end - 1); + sync_global_pgds(addr, end - 1, 0); __flush_tlb_all(); @@ -976,25 +987,26 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end, bool direct) { unsigned long next; + unsigned long addr; pgd_t *pgd; pud_t *pud; bool pgd_changed = false; - for (; start < end; start = next) { - next = pgd_addr_end(start, end); + for (addr = start; addr < end; addr = next) { + next = pgd_addr_end(addr, end); - pgd = pgd_offset_k(start); + pgd = pgd_offset_k(addr); if (!pgd_present(*pgd)) continue; pud = (pud_t *)pgd_page_vaddr(*pgd); - remove_pud_table(pud, start, next, direct); + remove_pud_table(pud, addr, next, direct); if (free_pud_table(pud, pgd)) pgd_changed = true; } if (pgd_changed) - sync_global_pgds(start, end - 1); + sync_global_pgds(start, end - 1, 1); flush_tlb_all(); } @@ -1341,7 +1353,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) else err = vmemmap_populate_basepages(start, end, node); if (!err) - sync_global_pgds(start, end - 1); + sync_global_pgds(start, end - 1, 0); return err; } diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index a32b706c401a..d221374d5ce8 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -185,8 +185,8 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) return numa_add_memblk_to(nid, start, end, &numa_meminfo); } -/* Initialize NODE_DATA for a node on the local memory */ -static void __init setup_node_data(int nid, u64 start, u64 end) +/* Allocate NODE_DATA for a node on the local memory */ +static void __init alloc_node_data(int nid) { const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); u64 nd_pa; @@ -194,18 +194,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end) int tnid; /* - * Don't confuse VM with a node that doesn't have the - * minimum amount of memory: - */ - if (end && (end - start) < NODE_MIN_SIZE) - return; - - start = roundup(start, ZONE_ALIGN); - - printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", - nid, start, end - 1); - - /* * Allocate node data. Try node-local memory and then any node. * Never allocate in DMA zone. */ @@ -222,7 +210,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end) nd = __va(nd_pa); /* report and initialize */ - printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n", + printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid, nd_pa, nd_pa + nd_size - 1); tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); if (tnid != nid) @@ -230,9 +218,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end) node_data[nid] = nd; memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); - NODE_DATA(nid)->node_id = nid; - NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT; - NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT; node_set_online(nid); } @@ -523,8 +508,17 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) end = max(mi->blk[i].end, end); } - if (start < end) - setup_node_data(nid, start, end); + if (start >= end) + continue; + + /* + * Don't confuse VM with a node that doesn't have the + * minimum amount of memory: + */ + if (end && (end - start) < NODE_MIN_SIZE) + continue; + + alloc_node_data(nid); } /* Dump memblock with node info and return. */ diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 4dd8cf652579..75cc0978d45d 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -59,41 +59,6 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) __flush_tlb_one(vaddr); } -/* - * Associate a large virtual page frame with a given physical page frame - * and protection flags for that frame. pfn is for the base of the page, - * vaddr is what the page gets mapped to - both must be properly aligned. - * The pmd must already be instantiated. Assumes PAE mode. - */ -void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ - printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); - return; /* BUG(); */ - } - if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ - printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); - return; /* BUG(); */ - } - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); - return; /* BUG(); */ - } - pud = pud_offset(pgd, vaddr); - pmd = pmd_offset(pud, vaddr); - set_pmd(pmd, pfn_pmd(pfn, flags)); - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - unsigned long __FIXADDR_TOP = 0xfffff000; EXPORT_SYMBOL(__FIXADDR_TOP); diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 2ae525e0d8ba..37c1435889ce 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -442,8 +442,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, */ prot |= _PAGE_CACHE_UC_MINUS; - prot |= _PAGE_IOMAP; /* creating a mapping for IO */ - vma->vm_page_prot = __pgprot(prot); if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index bbb1d2259ecf..a5efb21d5228 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -695,7 +695,7 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, * */ static int per_cpu_shndx = -1; -Elf_Addr per_cpu_load_addr; +static Elf_Addr per_cpu_load_addr; static void percpu_init(void) { diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h index fd57829b30d8..0224987556ce 100644 --- a/arch/x86/vdso/vdso2c.h +++ b/arch/x86/vdso/vdso2c.h @@ -109,16 +109,18 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, /* Validate mapping addresses. */ for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) { - if (!syms[i]) + INT_BITS symval = syms[special_pages[i]]; + + if (!symval) continue; /* The mapping isn't used; ignore it. */ - if (syms[i] % 4096) + if (symval % 4096) fail("%s must be a multiple of 4096\n", required_syms[i].name); - if (syms[sym_vvar_start] > syms[i] + 4096) - fail("%s underruns begin_vvar\n", + if (symval + 4096 < syms[sym_vvar_start]) + fail("%s underruns vvar_start\n", required_syms[i].name); - if (syms[i] + 4096 > 0) + if (symval + 4096 > 0) fail("%s is on the wrong side of the vdso text\n", required_syms[i].name); } diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index a02e09e18f57..be14cc3e48d5 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -15,12 +15,14 @@ * with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/bitops.h> #include <linux/efi.h> #include <linux/init.h> #include <linux/string.h> #include <xen/xen-ops.h> +#include <asm/page.h> #include <asm/setup.h> void __init xen_efi_init(void) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c0cb11fb5008..acb0effd8077 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1463,6 +1463,7 @@ static void __ref xen_setup_gdt(int cpu) pv_cpu_ops.load_gdt = xen_load_gdt; } +#ifdef CONFIG_XEN_PVH /* * A PV guest starts with default flags that are not set for PVH, set them * here asap. @@ -1508,17 +1509,21 @@ static void __init xen_pvh_early_guest_init(void) return; xen_have_vector_callback = 1; + + xen_pvh_early_cpu_init(0, false); xen_pvh_set_cr_flags(0); #ifdef CONFIG_X86_32 BUG(); /* PVH: Implement proper support. */ #endif } +#endif /* CONFIG_XEN_PVH */ /* First C function to be called on Xen boot */ asmlinkage __visible void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; + unsigned long initrd_start = 0; int rc; if (!xen_start_info) @@ -1527,7 +1532,9 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_domain_type = XEN_PV_DOMAIN; xen_setup_features(); +#ifdef CONFIG_XEN_PVH xen_pvh_early_guest_init(); +#endif xen_setup_machphys_mapping(); /* Install Xen paravirt ops */ @@ -1559,8 +1566,6 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); - __supported_pte_mask |= _PAGE_IOMAP; - /* * Prevent page tables from being allocated in highmem, even * if CONFIG_HIGHPTE is enabled. @@ -1667,10 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void) new_cpu_data.x86_capability[0] = cpuid_edx(1); #endif + if (xen_start_info->mod_start) { + if (xen_start_info->flags & SIF_MOD_START_PFN) + initrd_start = PFN_PHYS(xen_start_info->mod_start); + else + initrd_start = __pa(xen_start_info->mod_start); + } + /* Poke various useful things into boot_params */ boot_params.hdr.type_of_loader = (9 << 4) | 0; - boot_params.hdr.ramdisk_image = xen_start_info->mod_start - ? __pa(xen_start_info->mod_start) : 0; + boot_params.hdr.ramdisk_image = initrd_start; boot_params.hdr.ramdisk_size = xen_start_info->mod_len; boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 16fb0099b7f2..f62af7647ec9 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -399,38 +399,14 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (unlikely(mfn == INVALID_P2M_ENTRY)) { mfn = 0; flags = 0; - } else { - /* - * Paramount to do this test _after_ the - * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY & - * IDENTITY_FRAME_BIT resolves to true. - */ - mfn &= ~FOREIGN_FRAME_BIT; - if (mfn & IDENTITY_FRAME_BIT) { - mfn &= ~IDENTITY_FRAME_BIT; - flags |= _PAGE_IOMAP; - } - } + } else + mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); val = ((pteval_t)mfn << PAGE_SHIFT) | flags; } return val; } -static pteval_t iomap_pte(pteval_t val) -{ - if (val & _PAGE_PRESENT) { - unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; - pteval_t flags = val & PTE_FLAGS_MASK; - - /* We assume the pte frame number is a MFN, so - just use it as-is. */ - val = ((pteval_t)pfn << PAGE_SHIFT) | flags; - } - - return val; -} - __visible pteval_t xen_pte_val(pte_t pte) { pteval_t pteval = pte.pte; @@ -441,9 +417,6 @@ __visible pteval_t xen_pte_val(pte_t pte) pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; } #endif - if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) - return pteval; - return pte_mfn_to_pfn(pteval); } PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); @@ -481,7 +454,6 @@ void xen_set_pat(u64 pat) __visible pte_t xen_make_pte(pteval_t pte) { - phys_addr_t addr = (pte & PTE_PFN_MASK); #if 0 /* If Linux is trying to set a WC pte, then map to the Xen WC. * If _PAGE_PAT is set, then it probably means it is really @@ -496,19 +468,7 @@ __visible pte_t xen_make_pte(pteval_t pte) pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; } #endif - /* - * Unprivileged domains are allowed to do IOMAPpings for - * PCI passthrough, but not map ISA space. The ISA - * mappings are just dummy local mappings to keep other - * parts of the kernel happy. - */ - if (unlikely(pte & _PAGE_IOMAP) && - (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { - pte = iomap_pte(pte); - } else { - pte &= ~_PAGE_IOMAP; - pte = pte_pfn_to_mfn(pte); - } + pte = pte_pfn_to_mfn(pte); return native_make_pte(pte); } @@ -2091,7 +2051,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) default: /* By default, set_fixmap is used for hardware mappings */ - pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP)); + pte = mfn_pte(phys, prot); break; } diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 3172692381ae..9f5983b01ed9 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -173,6 +173,7 @@ #include <xen/balloon.h> #include <xen/grant_table.h> +#include "p2m.h" #include "multicalls.h" #include "xen-ops.h" @@ -180,12 +181,6 @@ static void __init m2p_override_init(void); unsigned long xen_max_p2m_pfn __read_mostly; -#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) -#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) -#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) - -#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) - /* Placeholders for holes in the address space */ static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); @@ -202,16 +197,12 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE); RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); -/* We might hit two boundary violations at the start and end, at max each - * boundary violation will require three middle nodes. */ -RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3); - -/* When we populate back during bootup, the amount of pages can vary. The - * max we have is seen is 395979, but that does not mean it can't be more. - * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle - * it can re-use Xen provided mfn_list array, so we only need to allocate at - * most three P2M top nodes. */ -RESERVE_BRK(p2m_populated, PAGE_SIZE * 3); +/* For each I/O range remapped we may lose up to two leaf pages for the boundary + * violations and three mid pages to cover up to 3GB. With + * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the + * remapped region. + */ +RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES); static inline unsigned p2m_top_index(unsigned long pfn) { diff --git a/arch/x86/xen/p2m.h b/arch/x86/xen/p2m.h new file mode 100644 index 000000000000..ad8aee24ab72 --- /dev/null +++ b/arch/x86/xen/p2m.h @@ -0,0 +1,15 @@ +#ifndef _XEN_P2M_H +#define _XEN_P2M_H + +#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) +#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) +#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) + +#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) + +#define MAX_REMAP_RANGES 10 + +extern unsigned long __init set_phys_range_identity(unsigned long pfn_s, + unsigned long pfn_e); + +#endif /* _XEN_P2M_H */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 2e555163c2fe..af7216128d93 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -29,6 +29,7 @@ #include <xen/features.h> #include "xen-ops.h" #include "vdso.h" +#include "p2m.h" /* These are code, but not functions. Defined in entry.S */ extern const char xen_hypervisor_callback[]; @@ -46,6 +47,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; /* Number of pages released from the initial allocation. */ unsigned long xen_released_pages; +/* Buffer used to remap identity mapped pages */ +unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; + /* * The maximum amount of extra memory compared to the base size. The * main scaling factor is the size of struct page. At extreme ratios @@ -151,107 +155,325 @@ static unsigned long __init xen_do_chunk(unsigned long start, return len; } -static unsigned long __init xen_release_chunk(unsigned long start, - unsigned long end) -{ - return xen_do_chunk(start, end, true); -} - -static unsigned long __init xen_populate_chunk( +/* + * Finds the next RAM pfn available in the E820 map after min_pfn. + * This function updates min_pfn with the pfn found and returns + * the size of that range or zero if not found. + */ +static unsigned long __init xen_find_pfn_range( const struct e820entry *list, size_t map_size, - unsigned long max_pfn, unsigned long *last_pfn, - unsigned long credits_left) + unsigned long *min_pfn) { const struct e820entry *entry; unsigned int i; unsigned long done = 0; - unsigned long dest_pfn; for (i = 0, entry = list; i < map_size; i++, entry++) { unsigned long s_pfn; unsigned long e_pfn; - unsigned long pfns; - long capacity; - - if (credits_left <= 0) - break; if (entry->type != E820_RAM) continue; e_pfn = PFN_DOWN(entry->addr + entry->size); - /* We only care about E820 after the xen_start_info->nr_pages */ - if (e_pfn <= max_pfn) + /* We only care about E820 after this */ + if (e_pfn < *min_pfn) continue; s_pfn = PFN_UP(entry->addr); - /* If the E820 falls within the nr_pages, we want to start - * at the nr_pages PFN. - * If that would mean going past the E820 entry, skip it + + /* If min_pfn falls within the E820 entry, we want to start + * at the min_pfn PFN. */ - if (s_pfn <= max_pfn) { - capacity = e_pfn - max_pfn; - dest_pfn = max_pfn; + if (s_pfn <= *min_pfn) { + done = e_pfn - *min_pfn; } else { - capacity = e_pfn - s_pfn; - dest_pfn = s_pfn; + done = e_pfn - s_pfn; + *min_pfn = s_pfn; } + break; + } - if (credits_left < capacity) - capacity = credits_left; + return done; +} - pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); - done += pfns; - *last_pfn = (dest_pfn + pfns); - if (pfns < capacity) - break; - credits_left -= pfns; +/* + * This releases a chunk of memory and then does the identity map. It's used as + * as a fallback if the remapping fails. + */ +static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, + unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, + unsigned long *released) +{ + WARN_ON(start_pfn > end_pfn); + + /* Need to release pages first */ + *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true); + *identity += set_phys_range_identity(start_pfn, end_pfn); +} + +/* + * Helper function to update both the p2m and m2p tables. + */ +static unsigned long __init xen_update_mem_tables(unsigned long pfn, + unsigned long mfn) +{ + struct mmu_update update = { + .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, + .val = pfn + }; + + /* Update p2m */ + if (!early_set_phys_to_machine(pfn, mfn)) { + WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", + pfn, mfn); + return false; } - return done; + + /* Update m2p */ + if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { + WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", + mfn, pfn); + return false; + } + + return true; } -static void __init xen_set_identity_and_release_chunk( - unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, - unsigned long *released, unsigned long *identity) +/* + * This function updates the p2m and m2p tables with an identity map from + * start_pfn to start_pfn+size and remaps the underlying RAM of the original + * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks + * to not exhaust the reserved brk space. Doing it in properly aligned blocks + * ensures we only allocate the minimum required leaf pages in the p2m table. It + * copies the existing mfns from the p2m table under the 1:1 map, overwrites + * them with the identity map and then updates the p2m and m2p tables with the + * remapped memory. + */ +static unsigned long __init xen_do_set_identity_and_remap_chunk( + unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) { - unsigned long pfn; + unsigned long ident_pfn_iter, remap_pfn_iter; + unsigned long ident_start_pfn_align, remap_start_pfn_align; + unsigned long ident_end_pfn_align, remap_end_pfn_align; + unsigned long ident_boundary_pfn, remap_boundary_pfn; + unsigned long ident_cnt = 0; + unsigned long remap_cnt = 0; + unsigned long left = size; + unsigned long mod; + int i; + + WARN_ON(size == 0); + + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); /* - * If the PFNs are currently mapped, clear the mappings - * (except for the ISA region which must be 1:1 mapped) to - * release the refcounts (in Xen) on the original frames. + * Determine the proper alignment to remap memory in P2M_PER_PAGE sized + * blocks. We need to keep track of both the existing pfn mapping and + * the new pfn remapping. */ - for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { - pte_t pte = __pte_ma(0); + mod = start_pfn % P2M_PER_PAGE; + ident_start_pfn_align = + mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn; + mod = remap_pfn % P2M_PER_PAGE; + remap_start_pfn_align = + mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn; + mod = (start_pfn + size) % P2M_PER_PAGE; + ident_end_pfn_align = start_pfn + size - mod; + mod = (remap_pfn + size) % P2M_PER_PAGE; + remap_end_pfn_align = remap_pfn + size - mod; + + /* Iterate over each p2m leaf node in each range */ + for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align; + ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align; + ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) { + /* Check we aren't past the end */ + BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size); + BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size); + + /* Save p2m mappings */ + for (i = 0; i < P2M_PER_PAGE; i++) + xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i); + + /* Set identity map which will free a p2m leaf */ + ident_cnt += set_phys_range_identity(ident_pfn_iter, + ident_pfn_iter + P2M_PER_PAGE); + +#ifdef DEBUG + /* Helps verify a p2m leaf has been freed */ + for (i = 0; i < P2M_PER_PAGE; i++) { + unsigned int pfn = ident_pfn_iter + i; + BUG_ON(pfn_to_mfn(pfn) != pfn); + } +#endif + /* Now remap memory */ + for (i = 0; i < P2M_PER_PAGE; i++) { + unsigned long mfn = xen_remap_buf[i]; + + /* This will use the p2m leaf freed above */ + if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) { + WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", + remap_pfn_iter + i, mfn); + return 0; + } + + remap_cnt++; + } - if (pfn < PFN_UP(ISA_END_ADDRESS)) - pte = mfn_pte(pfn, PAGE_KERNEL_IO); + left -= P2M_PER_PAGE; + } - (void)HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0); + /* Max boundary space possible */ + BUG_ON(left > (P2M_PER_PAGE - 1) * 2); + + /* Now handle the boundary conditions */ + ident_boundary_pfn = start_pfn; + remap_boundary_pfn = remap_pfn; + for (i = 0; i < left; i++) { + unsigned long mfn; + + /* These two checks move from the start to end boundaries */ + if (ident_boundary_pfn == ident_start_pfn_align) + ident_boundary_pfn = ident_pfn_iter; + if (remap_boundary_pfn == remap_start_pfn_align) + remap_boundary_pfn = remap_pfn_iter; + + /* Check we aren't past the end */ + BUG_ON(ident_boundary_pfn >= start_pfn + size); + BUG_ON(remap_boundary_pfn >= remap_pfn + size); + + mfn = pfn_to_mfn(ident_boundary_pfn); + + if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) { + WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", + remap_pfn_iter + i, mfn); + return 0; + } + remap_cnt++; + + ident_boundary_pfn++; + remap_boundary_pfn++; } - if (start_pfn < nr_pages) - *released += xen_release_chunk( - start_pfn, min(end_pfn, nr_pages)); + /* Finish up the identity map */ + if (ident_start_pfn_align >= ident_end_pfn_align) { + /* + * In this case we have an identity range which does not span an + * aligned block so everything needs to be identity mapped here. + * If we didn't check this we might remap too many pages since + * the align boundaries are not meaningful in this case. + */ + ident_cnt += set_phys_range_identity(start_pfn, + start_pfn + size); + } else { + /* Remapped above so check each end of the chunk */ + if (start_pfn < ident_start_pfn_align) + ident_cnt += set_phys_range_identity(start_pfn, + ident_start_pfn_align); + if (start_pfn + size > ident_pfn_iter) + ident_cnt += set_phys_range_identity(ident_pfn_iter, + start_pfn + size); + } - *identity += set_phys_range_identity(start_pfn, end_pfn); + BUG_ON(ident_cnt != size); + BUG_ON(remap_cnt != size); + + return size; } -static unsigned long __init xen_set_identity_and_release( - const struct e820entry *list, size_t map_size, unsigned long nr_pages) +/* + * This function takes a contiguous pfn range that needs to be identity mapped + * and: + * + * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn. + * 2) Calls the do_ function to actually do the mapping/remapping work. + * + * The goal is to not allocate additional memory but to remap the existing + * pages. In the case of an error the underlying memory is simply released back + * to Xen and not remapped. + */ +static unsigned long __init xen_set_identity_and_remap_chunk( + const struct e820entry *list, size_t map_size, unsigned long start_pfn, + unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, + unsigned long *identity, unsigned long *remapped, + unsigned long *released) +{ + unsigned long pfn; + unsigned long i = 0; + unsigned long n = end_pfn - start_pfn; + + while (i < n) { + unsigned long cur_pfn = start_pfn + i; + unsigned long left = n - i; + unsigned long size = left; + unsigned long remap_range_size; + + /* Do not remap pages beyond the current allocation */ + if (cur_pfn >= nr_pages) { + /* Identity map remaining pages */ + *identity += set_phys_range_identity(cur_pfn, + cur_pfn + size); + break; + } + if (cur_pfn + size > nr_pages) + size = nr_pages - cur_pfn; + + remap_range_size = xen_find_pfn_range(list, map_size, + &remap_pfn); + if (!remap_range_size) { + pr_warning("Unable to find available pfn range, not remapping identity pages\n"); + xen_set_identity_and_release_chunk(cur_pfn, + cur_pfn + left, nr_pages, identity, released); + break; + } + /* Adjust size to fit in current e820 RAM region */ + if (size > remap_range_size) + size = remap_range_size; + + if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) { + WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n", + cur_pfn, size, remap_pfn); + xen_set_identity_and_release_chunk(cur_pfn, + cur_pfn + left, nr_pages, identity, released); + break; + } + + /* Update variables to reflect new mappings. */ + i += size; + remap_pfn += size; + *identity += size; + *remapped += size; + } + + /* + * If the PFNs are currently mapped, the VA mapping also needs + * to be updated to be 1:1. + */ + for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) + (void)HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(pfn, PAGE_KERNEL_IO), 0); + + return remap_pfn; +} + +static unsigned long __init xen_set_identity_and_remap( + const struct e820entry *list, size_t map_size, unsigned long nr_pages, + unsigned long *released) { phys_addr_t start = 0; - unsigned long released = 0; unsigned long identity = 0; + unsigned long remapped = 0; + unsigned long last_pfn = nr_pages; const struct e820entry *entry; + unsigned long num_released = 0; int i; /* * Combine non-RAM regions and gaps until a RAM region (or the * end of the map) is reached, then set the 1:1 map and - * release the pages (if available) in those non-RAM regions. + * remap the memory in those non-RAM regions. * * The combined non-RAM regions are rounded to a whole number * of pages so any partial pages are accessible via the 1:1 @@ -269,22 +491,24 @@ static unsigned long __init xen_set_identity_and_release( end_pfn = PFN_UP(entry->addr); if (start_pfn < end_pfn) - xen_set_identity_and_release_chunk( - start_pfn, end_pfn, nr_pages, - &released, &identity); - + last_pfn = xen_set_identity_and_remap_chunk( + list, map_size, start_pfn, + end_pfn, nr_pages, last_pfn, + &identity, &remapped, + &num_released); start = end; } } - if (released) - printk(KERN_INFO "Released %lu pages of unused memory\n", released); - if (identity) - printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); + *released = num_released; - return released; -} + pr_info("Set %ld page(s) to 1-1 mapping\n", identity); + pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped, + last_pfn); + pr_info("Released %ld page(s)\n", num_released); + return last_pfn; +} static unsigned long __init xen_get_max_pages(void) { unsigned long max_pages = MAX_DOMAIN_PAGES; @@ -347,7 +571,6 @@ char * __init xen_memory_setup(void) unsigned long max_pages; unsigned long last_pfn = 0; unsigned long extra_pages = 0; - unsigned long populated; int i; int op; @@ -392,20 +615,11 @@ char * __init xen_memory_setup(void) extra_pages += max_pages - max_pfn; /* - * Set P2M for all non-RAM pages and E820 gaps to be identity - * type PFNs. Any RAM pages that would be made inaccesible by - * this are first released. + * Set identity map on non-RAM pages and remap the underlying RAM. */ - xen_released_pages = xen_set_identity_and_release( - map, memmap.nr_entries, max_pfn); - - /* - * Populate back the non-RAM pages and E820 gaps that had been - * released. */ - populated = xen_populate_chunk(map, memmap.nr_entries, - max_pfn, &last_pfn, xen_released_pages); + last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, + &xen_released_pages); - xen_released_pages -= populated; extra_pages += xen_released_pages; if (last_pfn > max_pfn) { diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 7005974c3ff3..8650cdb53209 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -37,6 +37,7 @@ #include <xen/hvc-console.h> #include "xen-ops.h" #include "mmu.h" +#include "smp.h" cpumask_var_t xen_cpu_initialized_map; @@ -99,10 +100,14 @@ static void cpu_bringup(void) wmb(); /* make sure everything is out */ } -/* Note: cpu parameter is only relevant for PVH */ -static void cpu_bringup_and_idle(int cpu) +/* + * Note: cpu parameter is only relevant for PVH. The reason for passing it + * is we can't do smp_processor_id until the percpu segments are loaded, for + * which we need the cpu number! So we pass it in rdi as first parameter. + */ +asmlinkage __visible void cpu_bringup_and_idle(int cpu) { -#ifdef CONFIG_X86_64 +#ifdef CONFIG_XEN_PVH if (xen_feature(XENFEAT_auto_translated_physmap) && xen_feature(XENFEAT_supervisor_mode_kernel)) xen_pvh_secondary_vcpu_init(cpu); @@ -360,6 +365,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) struct desc_struct *gdt; unsigned long gdt_mfn; + /* used to tell cpu_init() that it can proceed with initialization */ + cpumask_set_cpu(cpu, cpu_callout_mask); if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; @@ -374,11 +381,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->user_regs.fs = __KERNEL_PERCPU; ctxt->user_regs.gs = __KERNEL_STACK_CANARY; #endif - ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; - memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); if (!xen_feature(XENFEAT_auto_translated_physmap)) { + ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.ds = __USER_DS; @@ -413,15 +419,18 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) (unsigned long)xen_failsafe_callback; ctxt->user_regs.cs = __KERNEL_CS; per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); -#ifdef CONFIG_X86_32 } -#else - } else - /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with - * %rdi having the cpu number - which means are passing in - * as the first parameter the cpu. Subtle! +#ifdef CONFIG_XEN_PVH + else { + /* + * The vcpu comes on kernel page tables which have the NX pte + * bit set. This means before DS/SS is touched, NX in + * EFER must be set. Hence the following assembly glue code. */ + ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init; ctxt->user_regs.rdi = cpu; + ctxt->user_regs.rsi = true; /* entry == true */ + } #endif ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index c7c2d89efd76..963d62a35c82 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -8,4 +8,12 @@ extern void xen_send_IPI_allbutself(int vector); extern void xen_send_IPI_all(int vector); extern void xen_send_IPI_self(int vector); +#ifdef CONFIG_XEN_PVH +extern void xen_pvh_early_cpu_init(int cpu, bool entry); +#else +static inline void xen_pvh_early_cpu_init(int cpu, bool entry) +{ +} +#endif + #endif diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 485b69585540..674b222544b7 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -47,6 +47,41 @@ ENTRY(startup_xen) __FINIT +#ifdef CONFIG_XEN_PVH +/* + * xen_pvh_early_cpu_init() - early PVH VCPU initialization + * @cpu: this cpu number (%rdi) + * @entry: true if this is a secondary vcpu coming up on this entry + * point, false if this is the boot CPU being initialized for + * the first time (%rsi) + * + * Note: This is called as a function on the boot CPU, and is the entry point + * on the secondary CPU. + */ +ENTRY(xen_pvh_early_cpu_init) + mov %rsi, %r11 + + /* Gather features to see if NX implemented. */ + mov $0x80000001, %eax + cpuid + mov %edx, %esi + + mov $MSR_EFER, %ecx + rdmsr + bts $_EFER_SCE, %eax + + bt $20, %esi + jnc 1f /* No NX, skip setting it */ + bts $_EFER_NX, %eax +1: wrmsr +#ifdef CONFIG_SMP + cmp $0, %r11b + jne cpu_bringup_and_idle +#endif + ret + +#endif /* CONFIG_XEN_PVH */ + .pushsection .text .balign PAGE_SIZE ENTRY(hypercall_page) @@ -124,6 +159,7 @@ NEXT_HYPERCALL(arch_6) ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) + ELFNOTE(Xen, XEN_ELFNOTE_MOD_START_PFN, .long 1) ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index e5103b47a8ce..00b7d46b35b8 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -47,7 +47,7 @@ * * Atomically reads the value of @v. */ -#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_read(v) ACCESS_ONCE((v)->counter) /** * atomic_set - set atomic variable @@ -58,165 +58,96 @@ */ #define atomic_set(v,i) ((v)->counter = (i)) -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v. - */ -static inline void atomic_add(int i, atomic_t * v) -{ #if XCHAL_HAVE_S32C1I - unsigned long tmp; - int result; - - __asm__ __volatile__( - "1: l32i %1, %3, 0\n" - " wsr %1, scompare1\n" - " add %0, %1, %2\n" - " s32c1i %0, %3, 0\n" - " bne %0, %1, 1b\n" - : "=&a" (result), "=&a" (tmp) - : "a" (i), "a" (v) - : "memory" - ); -#else - unsigned int vval; - - __asm__ __volatile__( - " rsil a15, "__stringify(LOCKLEVEL)"\n" - " l32i %0, %2, 0\n" - " add %0, %0, %1\n" - " s32i %0, %2, 0\n" - " wsr a15, ps\n" - " rsync\n" - : "=&a" (vval) - : "a" (i), "a" (v) - : "a15", "memory" - ); -#endif -} - -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v. - */ -static inline void atomic_sub(int i, atomic_t *v) -{ -#if XCHAL_HAVE_S32C1I - unsigned long tmp; - int result; - - __asm__ __volatile__( - "1: l32i %1, %3, 0\n" - " wsr %1, scompare1\n" - " sub %0, %1, %2\n" - " s32c1i %0, %3, 0\n" - " bne %0, %1, 1b\n" - : "=&a" (result), "=&a" (tmp) - : "a" (i), "a" (v) - : "memory" - ); -#else - unsigned int vval; - - __asm__ __volatile__( - " rsil a15, "__stringify(LOCKLEVEL)"\n" - " l32i %0, %2, 0\n" - " sub %0, %0, %1\n" - " s32i %0, %2, 0\n" - " wsr a15, ps\n" - " rsync\n" - : "=&a" (vval) - : "a" (i), "a" (v) - : "a15", "memory" - ); -#endif +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t * v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + __asm__ __volatile__( \ + "1: l32i %1, %3, 0\n" \ + " wsr %1, scompare1\n" \ + " " #op " %0, %1, %2\n" \ + " s32c1i %0, %3, 0\n" \ + " bne %0, %1, 1b\n" \ + : "=&a" (result), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "memory" \ + ); \ +} \ + +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t * v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + __asm__ __volatile__( \ + "1: l32i %1, %3, 0\n" \ + " wsr %1, scompare1\n" \ + " " #op " %0, %1, %2\n" \ + " s32c1i %0, %3, 0\n" \ + " bne %0, %1, 1b\n" \ + " " #op " %0, %0, %2\n" \ + : "=&a" (result), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "memory" \ + ); \ + \ + return result; \ } -/* - * We use atomic_{add|sub}_return to define other functions. - */ - -static inline int atomic_add_return(int i, atomic_t * v) -{ -#if XCHAL_HAVE_S32C1I - unsigned long tmp; - int result; - - __asm__ __volatile__( - "1: l32i %1, %3, 0\n" - " wsr %1, scompare1\n" - " add %0, %1, %2\n" - " s32c1i %0, %3, 0\n" - " bne %0, %1, 1b\n" - " add %0, %0, %2\n" - : "=&a" (result), "=&a" (tmp) - : "a" (i), "a" (v) - : "memory" - ); - - return result; -#else - unsigned int vval; - - __asm__ __volatile__( - " rsil a15,"__stringify(LOCKLEVEL)"\n" - " l32i %0, %2, 0\n" - " add %0, %0, %1\n" - " s32i %0, %2, 0\n" - " wsr a15, ps\n" - " rsync\n" - : "=&a" (vval) - : "a" (i), "a" (v) - : "a15", "memory" - ); - - return vval; -#endif +#else /* XCHAL_HAVE_S32C1I */ + +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t * v) \ +{ \ + unsigned int vval; \ + \ + __asm__ __volatile__( \ + " rsil a15, "__stringify(LOCKLEVEL)"\n"\ + " l32i %0, %2, 0\n" \ + " " #op " %0, %0, %1\n" \ + " s32i %0, %2, 0\n" \ + " wsr a15, ps\n" \ + " rsync\n" \ + : "=&a" (vval) \ + : "a" (i), "a" (v) \ + : "a15", "memory" \ + ); \ +} \ + +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t * v) \ +{ \ + unsigned int vval; \ + \ + __asm__ __volatile__( \ + " rsil a15,"__stringify(LOCKLEVEL)"\n" \ + " l32i %0, %2, 0\n" \ + " " #op " %0, %0, %1\n" \ + " s32i %0, %2, 0\n" \ + " wsr a15, ps\n" \ + " rsync\n" \ + : "=&a" (vval) \ + : "a" (i), "a" (v) \ + : "a15", "memory" \ + ); \ + \ + return vval; \ } -static inline int atomic_sub_return(int i, atomic_t * v) -{ -#if XCHAL_HAVE_S32C1I - unsigned long tmp; - int result; +#endif /* XCHAL_HAVE_S32C1I */ - __asm__ __volatile__( - "1: l32i %1, %3, 0\n" - " wsr %1, scompare1\n" - " sub %0, %1, %2\n" - " s32c1i %0, %3, 0\n" - " bne %0, %1, 1b\n" - " sub %0, %0, %2\n" - : "=&a" (result), "=&a" (tmp) - : "a" (i), "a" (v) - : "memory" - ); +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) - return result; -#else - unsigned int vval; - - __asm__ __volatile__( - " rsil a15,"__stringify(LOCKLEVEL)"\n" - " l32i %0, %2, 0\n" - " sub %0, %0, %1\n" - " s32i %0, %2, 0\n" - " wsr a15, ps\n" - " rsync\n" - : "=&a" (vval) - : "a" (i), "a" (v) - : "a15", "memory" - ); +ATOMIC_OPS(add) +ATOMIC_OPS(sub) - return vval; -#endif -} +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP /** * atomic_sub_and_test - subtract value from variable and test result |