summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMadhavan Srinivasan <maddy@linux.vnet.ibm.com>2017-12-20 04:55:57 +0100
committerMichael Ellerman <mpe@ellerman.id.au>2018-01-19 12:37:04 +0100
commit6cd74d2b6174957103c8f6f2cfcefe086a51e6eb (patch)
tree8f19bc213be4efadc4c1d6eb2845d8c6e6fa0baa /arch
parentpowerpc: use generic atomic implementation for local_t (diff)
downloadlinux-6cd74d2b6174957103c8f6f2cfcefe086a51e6eb.tar.xz
linux-6cd74d2b6174957103c8f6f2cfcefe086a51e6eb.zip
powerpc/64s: Implement local_t using irq soft masking
local_t is used for atomic modifications for per-CPU data, versus re-entrant modifications via interrupts. local_t read-modify-write atomic operations are currently implemented with hardware atomics (larx/stcx), which are quite slow. This patch implements them by masking all types of interrupts that may do local_t operations ("standard" and perf interrupts). Rusty's benchmark (https://lkml.org/lkml/2008/12/16/450) gives the following timings for the local_t test, in nanoseconds per iteration: larx/stcx irq+pmu disable _inc 38 10 _add 38 10 _read 4 4 _add_return 38 10 There are still some interrupt types (system reset, machine check, and watchdog), which can not safely use local_t operations, because they are not masked. An alternative approach was proposed, using a CR bit to mark a critical section, which is tested in the interrupt return path, and would then branch to a fixup handler (similar to exception fixups), which re-starts the operation. The problem with this was the complexity of the fixup handler and the latency of the slow path. https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-November/123024.html Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/local.h141
1 files changed, 141 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index cfbcc31d43ad..fdd00939270b 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -2,6 +2,147 @@
#ifndef _ARCH_POWERPC_LOCAL_H
#define _ARCH_POWERPC_LOCAL_H
+#ifdef CONFIG_PPC_BOOK3S_64
+
+#include <linux/percpu.h>
+#include <linux/atomic.h>
+#include <linux/irqflags.h>
+
+#include <asm/hw_irq.h>
+
+typedef struct
+{
+ long v;
+} local_t;
+
+#define LOCAL_INIT(i) { (i) }
+
+static __inline__ long local_read(local_t *l)
+{
+ return READ_ONCE(l->v);
+}
+
+static __inline__ void local_set(local_t *l, long i)
+{
+ WRITE_ONCE(l->v, i);
+}
+
+#define LOCAL_OP(op, c_op) \
+static __inline__ void local_##op(long i, local_t *l) \
+{ \
+ unsigned long flags; \
+ \
+ powerpc_local_irq_pmu_save(flags); \
+ l->v c_op i; \
+ powerpc_local_irq_pmu_restore(flags); \
+}
+
+#define LOCAL_OP_RETURN(op, c_op) \
+static __inline__ long local_##op##_return(long a, local_t *l) \
+{ \
+ long t; \
+ unsigned long flags; \
+ \
+ powerpc_local_irq_pmu_save(flags); \
+ t = (l->v c_op a); \
+ powerpc_local_irq_pmu_restore(flags); \
+ \
+ return t; \
+}
+
+#define LOCAL_OPS(op, c_op) \
+ LOCAL_OP(op, c_op) \
+ LOCAL_OP_RETURN(op, c_op)
+
+LOCAL_OPS(add, +=)
+LOCAL_OPS(sub, -=)
+
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+#define local_inc_return(l) local_add_return(1LL, l)
+#define local_inc(l) local_inc_return(l)
+
+/*
+ * local_inc_and_test - increment and test
+ * @l: pointer of type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
+
+#define local_dec_return(l) local_sub_return(1LL, l)
+#define local_dec(l) local_dec_return(l)
+#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l) (local_dec_return((l)) == 0)
+
+static __inline__ long local_cmpxchg(local_t *l, long o, long n)
+{
+ long t;
+ unsigned long flags;
+
+ powerpc_local_irq_pmu_save(flags);
+ t = l->v;
+ if (t == o)
+ l->v = n;
+ powerpc_local_irq_pmu_restore(flags);
+
+ return t;
+}
+
+static __inline__ long local_xchg(local_t *l, long n)
+{
+ long t;
+ unsigned long flags;
+
+ powerpc_local_irq_pmu_save(flags);
+ t = l->v;
+ l->v = n;
+ powerpc_local_irq_pmu_restore(flags);
+
+ return t;
+}
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+static __inline__ int local_add_unless(local_t *l, long a, long u)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ powerpc_local_irq_pmu_save(flags);
+ if (l->v != u) {
+ l->v += a;
+ ret = 1;
+ }
+ powerpc_local_irq_pmu_restore(flags);
+
+ return ret;
+}
+
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+/* Use these for per-cpu local_t variables: on some archs they are
+ * much more efficient than these naive implementations. Note they take
+ * a variable, not an address.
+ */
+
+#define __local_inc(l) ((l)->v++)
+#define __local_dec(l) ((l)->v++)
+#define __local_add(i,l) ((l)->v+=(i))
+#define __local_sub(i,l) ((l)->v-=(i))
+
+#else /* CONFIG_PPC64 */
+
#include <asm-generic/local.h>
+#endif /* CONFIG_PPC64 */
+
#endif /* _ARCH_POWERPC_LOCAL_H */