From 0a456fc58fb8ef3c53d18297ab5cd5d2a70d146b Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 1 Jul 2009 13:07:01 +1000
Subject: powerpc/perf_counter: Enable alternate PR/HV bits for POWER7

POWER7 has the same PR/HV bit layout as POWER6, so set the flag.

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Paul Mackerras <paulus@samba.org>
Cc: a.p.zijlstra@chello.nl
Cc: benh@kernel.crashing.org
LKML-Reference: <20090701030701.GI3563@kryten>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/power7-pmu.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 5d755ef7ac8f..5a9f5cbd40a4 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -358,6 +358,7 @@ static struct power_pmu power7_pmu = {
 	.get_constraint		= power7_get_constraint,
 	.get_alternatives	= power7_get_alternatives,
 	.disable_pmc		= power7_disable_pmc,
+	.flags			= PPMU_ALT_SIPR,
 	.n_generic		= ARRAY_SIZE(power7_generic_events),
 	.generic_events		= power7_generic_events,
 	.cache_events		= &power7_cache_events,
-- 
cgit v1.2.3


From 0406ca6d8e849d9dd027c8cb6791448e81411aef Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 1 Jul 2009 21:02:09 +0200
Subject: perf_counter: Ignore the nmi call frames in the x86-64 backtraces

About every callchains recorded with perf record are filled up
including the internal perfcounter nmi frame:

 perf_callchain
 perf_counter_overflow
 intel_pmu_handle_irq
 perf_counter_nmi_handler
 notifier_call_chain
 atomic_notifier_call_chain
 notify_die
 do_nmi
 nmi

We want ignore this frame as it's not interesting for
instrumentation. To solve this, we simply ignore every frames
from nmi context.

New example of "perf report -s sym -c" after this patch:

9.59%  [k] search_by_key
             4.88%
                search_by_key
                reiserfs_read_locked_inode
                reiserfs_iget
                reiserfs_lookup
                do_lookup
                __link_path_walk
                path_walk
                do_path_lookup
                user_path_at
                vfs_fstatat
                vfs_lstat
                sys_newlstat
                system_call_fastpath
                __lxstat
                0x406fb1

             3.19%
                search_by_key
                search_by_entry_key
                reiserfs_find_entry
                reiserfs_lookup
                do_lookup
                __link_path_walk
                path_walk
                do_path_lookup
                user_path_at
                vfs_fstatat
                vfs_lstat
                sys_newlstat
                system_call_fastpath
                __lxstat
                0x406fb1
[...]

For now this patch only solves the problem in x86-64.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1246474930-6088-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/stacktrace.h  |  2 ++
 arch/x86/kernel/cpu/perf_counter.c |  8 +++++++-
 arch/x86/kernel/dumpstack_32.c     |  6 ++++++
 arch/x86/kernel/dumpstack_64.c     | 22 +++++++++++++++-------
 4 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f517944b2b17..cf86a5e73815 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -3,6 +3,8 @@
 
 extern int kstack_depth_to_print;
 
+int x86_is_stack_id(int id, char *name);
+
 /* Generic stack tracer with callbacks */
 
 struct stacktrace_ops {
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d4cf4ce19aac..36c3dc7b8991 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1561,6 +1561,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
 
 static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
 static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
+static DEFINE_PER_CPU(int, in_nmi_frame);
 
 
 static void
@@ -1576,7 +1577,9 @@ static void backtrace_warning(void *data, char *msg)
 
 static int backtrace_stack(void *data, char *name)
 {
-	/* Process all stacks: */
+	per_cpu(in_nmi_frame, smp_processor_id()) =
+			x86_is_stack_id(NMI_STACK, name);
+
 	return 0;
 }
 
@@ -1584,6 +1587,9 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 {
 	struct perf_callchain_entry *entry = data;
 
+	if (per_cpu(in_nmi_frame, smp_processor_id()))
+		return;
+
 	if (reliable)
 		callchain_store(entry, addr);
 }
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index d593cd1f58dc..bca5fba91c9e 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -19,6 +19,12 @@
 
 #include "dumpstack.h"
 
+/* Just a stub for now */
+int x86_is_stack_id(int id, char *name)
+{
+	return 0;
+}
+
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index d35db5993fd6..54b0a3276766 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -19,10 +19,8 @@
 
 #include "dumpstack.h"
 
-static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
-					unsigned *usedp, char **idp)
-{
-	static char ids[][8] = {
+
+static char x86_stack_ids[][8] = {
 		[DEBUG_STACK - 1] = "#DB",
 		[NMI_STACK - 1] = "NMI",
 		[DOUBLEFAULT_STACK - 1] = "#DF",
@@ -33,6 +31,15 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 			N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
 #endif
 	};
+
+int x86_is_stack_id(int id, char *name)
+{
+	return x86_stack_ids[id - 1] == name;
+}
+
+static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
+					unsigned *usedp, char **idp)
+{
 	unsigned k;
 
 	/*
@@ -61,7 +68,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 			if (*usedp & (1U << k))
 				break;
 			*usedp |= 1U << k;
-			*idp = ids[k];
+			*idp = x86_stack_ids[k];
 			return (unsigned long *)end;
 		}
 		/*
@@ -81,12 +88,13 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 			do {
 				++j;
 				end -= EXCEPTION_STKSZ;
-				ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
+				x86_stack_ids[j][4] = '1' +
+						(j - N_EXCEPTION_STACKS);
 			} while (stack < end - EXCEPTION_STKSZ);
 			if (*usedp & (1U << j))
 				break;
 			*usedp |= 1U << j;
-			*idp = ids[j];
+			*idp = x86_stack_ids[j];
 			return (unsigned long *)end;
 		}
 #endif
-- 
cgit v1.2.3


From bbf2a330d92c5afccfd17592ba9ccd50f41cf748 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 3 Jul 2009 00:08:26 +0200
Subject: x86: atomic64: The atomic64_t data type should be 8 bytes aligned on
 32-bit too

Locked instructions on two cache lines at once are painful. If
atomic64_t uses two cache lines, my test program is 10x slower.

The chance for that is significant: 4/32 or 12.5%.

Make sure an atomic64_t is 8 bytes aligned.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907021653030.3210@localhost.localdomain>
[ changed it to __aligned(8) as per Andrew's suggestion ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/atomic_32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 2503d4e64c2a..ae0fbb5b0578 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -250,7 +250,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 /* An 64bit atomic type */
 
 typedef struct {
-	unsigned long long counter;
+	unsigned long long __aligned(8) counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(val)	{ (val) }
-- 
cgit v1.2.3


From b7882b7c65abb00194bdb3d4a22d27d70fcc59ba Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 13:26:39 +0200
Subject: x86: atomic64: Move the 32-bit atomic64_t implementation to a .c file

Linus noted that the atomic64_t primitives are all inlines
currently which is crazy because these functions have a large
register footprint anyway.

Move them to a separate file: arch/x86/lib/atomic64_32.c

Also, while at it, rename all uses of 'unsigned long long' to
the much shorter u64.

This makes the appearance of the prototypes a lot nicer - and
it also uncovered a few bugs where (yet unused) API variants
had 'long' as their return type instead of u64.

[ More intrusive changes are not yet done in this patch. ]

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907021653030.3210@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/atomic_32.h | 137 ++++---------------------
 arch/x86/lib/Makefile            |   1 +
 arch/x86/lib/atomic64_32.c       | 216 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 237 insertions(+), 117 deletions(-)
 create mode 100644 arch/x86/lib/atomic64_32.c

(limited to 'arch')

diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index ae0fbb5b0578..311a43e47c0b 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -250,7 +250,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 /* An 64bit atomic type */
 
 typedef struct {
-	unsigned long long __aligned(8) counter;
+	u64 __aligned(8) counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(val)	{ (val) }
@@ -264,31 +264,7 @@ typedef struct {
  */
 #define __atomic64_read(ptr)		((ptr)->counter)
 
-static inline unsigned long long
-cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new)
-{
-	asm volatile(
-
-		LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
-
-		     :		"=A" (old)
-
-		     : [ptr]	"D" (ptr),
-				"A" (old),
-				"b" (ll_low(new)),
-				"c" (ll_high(new))
-
-		     : "memory");
-
-	return old;
-}
-
-static inline unsigned long long
-atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
-		 unsigned long long new_val)
-{
-	return cmpxchg8b(&ptr->counter, old_val, new_val);
-}
+extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val);
 
 /**
  * atomic64_xchg - xchg atomic64 variable
@@ -298,18 +274,7 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
  * Atomically xchgs the value of @ptr to @new_val and returns
  * the old value.
  */
-
-static inline unsigned long long
-atomic64_xchg(atomic64_t *ptr, unsigned long long new_val)
-{
-	unsigned long long old_val;
-
-	do {
-		old_val = atomic_read(ptr);
-	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
-
-	return old_val;
-}
+extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val);
 
 /**
  * atomic64_set - set atomic64 variable
@@ -318,10 +283,7 @@ atomic64_xchg(atomic64_t *ptr, unsigned long long new_val)
  *
  * Atomically sets the value of @ptr to @new_val.
  */
-static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
-{
-	atomic64_xchg(ptr, new_val);
-}
+extern void atomic64_set(atomic64_t *ptr, u64 new_val);
 
 /**
  * atomic64_read - read atomic64 variable
@@ -329,16 +291,7 @@ static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
  *
  * Atomically reads the value of @ptr and returns it.
  */
-static inline unsigned long long atomic64_read(atomic64_t *ptr)
-{
-	unsigned long long curr_val;
-
-	do {
-		curr_val = __atomic64_read(ptr);
-	} while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
-
-	return curr_val;
-}
+extern u64 atomic64_read(atomic64_t *ptr);
 
 /**
  * atomic64_add_return - add and return
@@ -347,34 +300,14 @@ static inline unsigned long long atomic64_read(atomic64_t *ptr)
  *
  * Atomically adds @delta to @ptr and returns @delta + *@ptr
  */
-static inline unsigned long long
-atomic64_add_return(unsigned long long delta, atomic64_t *ptr)
-{
-	unsigned long long old_val, new_val;
-
-	do {
-		old_val = atomic_read(ptr);
-		new_val = old_val + delta;
-
-	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
-
-	return new_val;
-}
-
-static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr)
-{
-	return atomic64_add_return(-delta, ptr);
-}
+extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr);
 
-static inline long atomic64_inc_return(atomic64_t *ptr)
-{
-	return atomic64_add_return(1, ptr);
-}
-
-static inline long atomic64_dec_return(atomic64_t *ptr)
-{
-	return atomic64_sub_return(1, ptr);
-}
+/*
+ * Other variants with different arithmetic operators:
+ */
+extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr);
+extern u64 atomic64_inc_return(atomic64_t *ptr);
+extern u64 atomic64_dec_return(atomic64_t *ptr);
 
 /**
  * atomic64_add - add integer to atomic64 variable
@@ -383,10 +316,7 @@ static inline long atomic64_dec_return(atomic64_t *ptr)
  *
  * Atomically adds @delta to @ptr.
  */
-static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
-{
-	atomic64_add_return(delta, ptr);
-}
+extern void atomic64_add(u64 delta, atomic64_t *ptr);
 
 /**
  * atomic64_sub - subtract the atomic64 variable
@@ -395,10 +325,7 @@ static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
  *
  * Atomically subtracts @delta from @ptr.
  */
-static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
-{
-	atomic64_add(-delta, ptr);
-}
+extern void atomic64_sub(u64 delta, atomic64_t *ptr);
 
 /**
  * atomic64_sub_and_test - subtract value from variable and test result
@@ -409,13 +336,7 @@ static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline int
-atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
-{
-	unsigned long long old_val = atomic64_sub_return(delta, ptr);
-
-	return old_val == 0;
-}
+extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr);
 
 /**
  * atomic64_inc - increment atomic64 variable
@@ -423,10 +344,7 @@ atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
  *
  * Atomically increments @ptr by 1.
  */
-static inline void atomic64_inc(atomic64_t *ptr)
-{
-	atomic64_add(1, ptr);
-}
+extern void atomic64_inc(atomic64_t *ptr);
 
 /**
  * atomic64_dec - decrement atomic64 variable
@@ -434,10 +352,7 @@ static inline void atomic64_inc(atomic64_t *ptr)
  *
  * Atomically decrements @ptr by 1.
  */
-static inline void atomic64_dec(atomic64_t *ptr)
-{
-	atomic64_sub(1, ptr);
-}
+extern void atomic64_dec(atomic64_t *ptr);
 
 /**
  * atomic64_dec_and_test - decrement and test
@@ -447,10 +362,7 @@ static inline void atomic64_dec(atomic64_t *ptr)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline int atomic64_dec_and_test(atomic64_t *ptr)
-{
-	return atomic64_sub_and_test(1, ptr);
-}
+extern int atomic64_dec_and_test(atomic64_t *ptr);
 
 /**
  * atomic64_inc_and_test - increment and test
@@ -460,10 +372,7 @@ static inline int atomic64_dec_and_test(atomic64_t *ptr)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline int atomic64_inc_and_test(atomic64_t *ptr)
-{
-	return atomic64_sub_and_test(-1, ptr);
-}
+extern int atomic64_inc_and_test(atomic64_t *ptr);
 
 /**
  * atomic64_add_negative - add and test if negative
@@ -474,13 +383,7 @@ static inline int atomic64_inc_and_test(atomic64_t *ptr)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline int
-atomic64_add_negative(unsigned long long delta, atomic64_t *ptr)
-{
-	long long old_val = atomic64_add_return(delta, ptr);
-
-	return old_val < 0;
-}
+extern int atomic64_add_negative(u64 delta, atomic64_t *ptr);
 
 #include <asm-generic/atomic-long.h>
 #endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index f9d35632666b..c3c657c8bb83 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -10,6 +10,7 @@ lib-y += usercopy_$(BITS).o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
 
 ifeq ($(CONFIG_X86_32),y)
+        lib-y += atomic64_32.o
         lib-y += checksum_32.o
         lib-y += strstr_32.o
         lib-y += semaphore_32.o string_32.o
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
new file mode 100644
index 000000000000..d21e725d3d84
--- /dev/null
+++ b/arch/x86/lib/atomic64_32.c
@@ -0,0 +1,216 @@
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/processor.h>
+#include <asm/cmpxchg.h>
+#include <asm/atomic.h>
+
+static inline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new)
+{
+	asm volatile(
+
+		LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
+
+		     :		"=A" (old)
+
+		     : [ptr]	"D" (ptr),
+				"A" (old),
+				"b" (ll_low(new)),
+				"c" (ll_high(new))
+
+		     : "memory");
+
+	return old;
+}
+
+u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val)
+{
+	return cmpxchg8b(&ptr->counter, old_val, new_val);
+}
+
+/**
+ * atomic64_xchg - xchg atomic64 variable
+ * @ptr:      pointer to type atomic64_t
+ * @new_val:  value to assign
+ *
+ * Atomically xchgs the value of @ptr to @new_val and returns
+ * the old value.
+ */
+
+u64 atomic64_xchg(atomic64_t *ptr, u64 new_val)
+{
+	u64 old_val;
+
+	do {
+		old_val = atomic_read(ptr);
+	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+
+	return old_val;
+}
+
+/**
+ * atomic64_set - set atomic64 variable
+ * @ptr:      pointer to type atomic64_t
+ * @new_val:  value to assign
+ *
+ * Atomically sets the value of @ptr to @new_val.
+ */
+void atomic64_set(atomic64_t *ptr, u64 new_val)
+{
+	atomic64_xchg(ptr, new_val);
+}
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @ptr:      pointer to type atomic64_t
+ *
+ * Atomically reads the value of @ptr and returns it.
+ */
+u64 atomic64_read(atomic64_t *ptr)
+{
+	u64 curr_val;
+
+	do {
+		curr_val = __atomic64_read(ptr);
+	} while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
+
+	return curr_val;
+}
+
+/**
+ * atomic64_add_return - add and return
+ * @delta: integer value to add
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns @delta + *@ptr
+ */
+u64 atomic64_add_return(u64 delta, atomic64_t *ptr)
+{
+	u64 old_val, new_val;
+
+	do {
+		old_val = atomic_read(ptr);
+		new_val = old_val + delta;
+
+	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+
+	return new_val;
+}
+
+u64 atomic64_sub_return(u64 delta, atomic64_t *ptr)
+{
+	return atomic64_add_return(-delta, ptr);
+}
+
+u64 atomic64_inc_return(atomic64_t *ptr)
+{
+	return atomic64_add_return(1, ptr);
+}
+
+u64 atomic64_dec_return(atomic64_t *ptr)
+{
+	return atomic64_sub_return(1, ptr);
+}
+
+/**
+ * atomic64_add - add integer to atomic64 variable
+ * @delta: integer value to add
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr.
+ */
+void atomic64_add(u64 delta, atomic64_t *ptr)
+{
+	atomic64_add_return(delta, ptr);
+}
+
+/**
+ * atomic64_sub - subtract the atomic64 variable
+ * @delta: integer value to subtract
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr.
+ */
+void atomic64_sub(u64 delta, atomic64_t *ptr)
+{
+	atomic64_add(-delta, ptr);
+}
+
+/**
+ * atomic64_sub_and_test - subtract value from variable and test result
+ * @delta: integer value to subtract
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+int atomic64_sub_and_test(u64 delta, atomic64_t *ptr)
+{
+	u64 old_val = atomic64_sub_return(delta, ptr);
+
+	return old_val == 0;
+}
+
+/**
+ * atomic64_inc - increment atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1.
+ */
+void atomic64_inc(atomic64_t *ptr)
+{
+	atomic64_add(1, ptr);
+}
+
+/**
+ * atomic64_dec - decrement atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1.
+ */
+void atomic64_dec(atomic64_t *ptr)
+{
+	atomic64_sub(1, ptr);
+}
+
+/**
+ * atomic64_dec_and_test - decrement and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+int atomic64_dec_and_test(atomic64_t *ptr)
+{
+	return atomic64_sub_and_test(1, ptr);
+}
+
+/**
+ * atomic64_inc_and_test - increment and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+int atomic64_inc_and_test(atomic64_t *ptr)
+{
+	return atomic64_sub_and_test(-1, ptr);
+}
+
+/**
+ * atomic64_add_negative - add and test if negative
+ * @delta: integer value to add
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+int atomic64_add_negative(u64 delta, atomic64_t *ptr)
+{
+	long long old_val = atomic64_add_return(delta, ptr);
+
+	return old_val < 0;
+}
-- 
cgit v1.2.3


From aacf682fd8c66b57383c407eecd9d4a28264ee91 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 3 Jul 2009 12:14:27 +0200
Subject: x86: atomic64: Improve atomic64_read()

Linus noticed that the 32-bit version of atomic64_read() was
being overly complex with re-reading the value and doing a
retry loop over that.

Instead we can just rely on cmpxchg8b returning either the new
value or returning the current value.

We can use any 'old' value, which will be faster as it can be
loaded via immediates. Using some value that is not equal to
the real value in memory the instruction gets faster.

This also has the advantage that the CPU could avoid dirtying
the cacheline.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907021653030.3210@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/atomic64_32.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index d21e725d3d84..afa5d444918b 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -67,13 +67,9 @@ void atomic64_set(atomic64_t *ptr, u64 new_val)
  */
 u64 atomic64_read(atomic64_t *ptr)
 {
-	u64 curr_val;
+	u64 old = 1LL << 32;
 
-	do {
-		curr_val = __atomic64_read(ptr);
-	} while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
-
-	return curr_val;
+	return cmpxchg8b(&ptr->counter, old, old);
 }
 
 /**
-- 
cgit v1.2.3


From 69237f94e65d3d7f539f1adb98ef68685c595004 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 3 Jul 2009 13:26:41 +0200
Subject: x86: atomic64: Improve cmpxchg8b()

Rewrite cmpxchg8b() to not use %edi register but a generic "+m"
constraint, to increase compiler freedom in code generation and
possibly better code.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907021653030.3210@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/atomic64_32.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index afa5d444918b..5fc1e2caa544 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -6,19 +6,14 @@
 
 static inline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new)
 {
-	asm volatile(
-
-		LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
-
-		     :		"=A" (old)
-
-		     : [ptr]	"D" (ptr),
-				"A" (old),
-				"b" (ll_low(new)),
-				"c" (ll_high(new))
-
-		     : "memory");
+	u32 low = new;
+	u32 high = new >> 32;
 
+	asm volatile(
+		LOCK_PREFIX "cmpxchg8b %1\n"
+		     : "+A" (old), "+m" (*ptr)
+		     :  "b" (low),  "c" (high)
+		     );
 	return old;
 }
 
-- 
cgit v1.2.3


From 824975ef190e7dcb77718d1cc2cb53769b16d918 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 12:39:07 +0200
Subject: x86: atomic64: Improve atomic64_add_return()

Linus noted (based on Eric Dumazet's numbers) that we would
probably be better off not trying an atomic_read() in
atomic64_add_return() but intead intentionally let the first
cmpxchg8b fail - to get a cache-friendly 'give me ownership
of this cacheline' transaction. That can then be followed
by the real cmpxchg8b which sets the value local to the CPU.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907021653030.3210@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/atomic64_32.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index 5fc1e2caa544..61959627e1e1 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -76,13 +76,22 @@ u64 atomic64_read(atomic64_t *ptr)
  */
 u64 atomic64_add_return(u64 delta, atomic64_t *ptr)
 {
-	u64 old_val, new_val;
+	/*
+	 * Try first with a (probably incorrect) assumption about
+	 * what we have there. We'll do two loops most likely,
+	 * but we'll get an ownership MESI transaction straight away
+	 * instead of a read transaction followed by a
+	 * flush-for-ownership transaction:
+	 */
+	u64 old_val, new_val, real_val = 1ULL << 32;
 
 	do {
-		old_val = atomic_read(ptr);
+		old_val = real_val;
 		new_val = old_val + delta;
 
-	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+		real_val = atomic64_cmpxchg(ptr, old_val, new_val);
+
+	} while (real_val != old_val);
 
 	return new_val;
 }
-- 
cgit v1.2.3


From 3ac805d2afd3fa4a07cb5bcf352fd7fa83f28935 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 12:51:19 +0200
Subject: x86: atomic64: Reduce size of functions

cmpxchg8b is a huge instruction in terms of register footprint,
we almost never want to inline it, not even within the same
code module.

GCC 4.3 still messes up for two functions, under-judging the
true cost of this instruction - so annotate two key functions
to reduce the bloat:

arch/x86/lib/atomic64_32.o:

   text	   data	    bss	    dec	    hex	filename
   1763	      0	      0	   1763	    6e3	atomic64_32.o.before
    435	      0	      0	    435	    1b3	atomic64_32.o.after

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907021653030.3210@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/atomic64_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index 61959627e1e1..a910238a7760 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -4,7 +4,7 @@
 #include <asm/cmpxchg.h>
 #include <asm/atomic.h>
 
-static inline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new)
+static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new)
 {
 	u32 low = new;
 	u32 high = new >> 32;
@@ -74,7 +74,7 @@ u64 atomic64_read(atomic64_t *ptr)
  *
  * Atomically adds @delta to @ptr and returns @delta + *@ptr
  */
-u64 atomic64_add_return(u64 delta, atomic64_t *ptr)
+noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr)
 {
 	/*
 	 * Try first with a (probably incorrect) assumption about
-- 
cgit v1.2.3


From 3217120873598533234b6dedda9c371ce30001d0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 13:06:01 +0200
Subject: x86: atomic64: Make atomic_read() type-safe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Linus noticed that atomic64_xchg() uses atomic_read(), which
happens to work because atomic_read() is a macro so the
.counter value gets u64-read on 32-bit too - but this is really
bogus and serious bugs are waiting to happen.

Change atomic_read() to be a type-safe inline, and this exposes
the atomic64 bogosity as well:

  arch/x86/lib/atomic64_32.c: In function ‘atomic64_xchg’:
  arch/x86/lib/atomic64_32.c:39: warning: passing argument 1 of ‘atomic_read’ from incompatible pointer type

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907021653030.3210@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/atomic_32.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 311a43e47c0b..b551bb1ae3cf 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -19,7 +19,10 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)		((v)->counter)
+static inline int atomic_read(const atomic_t *v)
+{
+	return v->counter;
+}
 
 /**
  * atomic_set - set atomic variable
-- 
cgit v1.2.3


From 199e23780a7e75c63a9e3d1108804e3af450ea3e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 13:02:39 +0200
Subject: x86: atomic64: Fix unclean type use in atomic64_xchg()

Linus noticed that atomic64_xchg() uses atomic_read(), which
happens to work because atomic_read() is a macro so the
.counter value gets u64-read on 32-bit too - but this is really
bogus and serious bugs are waiting to happen.

Fix atomic64_xchg() to use __atomic64_read() instead.

No code changed:

arch/x86/lib/atomic64_32.o:

   text	   data	    bss	    dec	    hex	filename
    435	      0	      0	    435	    1b3	atomic64_32.o.before
    435	      0	      0	    435	    1b3	atomic64_32.o.after

md5:
   bd8ab95e69c93518578bfaf0ea3be4d9  atomic64_32.o.before.asm
   bd8ab95e69c93518578bfaf0ea3be4d9  atomic64_32.o.after.asm

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907021653030.3210@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/atomic64_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index a910238a7760..fd28fd3fb742 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -36,7 +36,7 @@ u64 atomic64_xchg(atomic64_t *ptr, u64 new_val)
 	u64 old_val;
 
 	do {
-		old_val = atomic_read(ptr);
+		old_val = __atomic64_read(ptr);
 	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
 
 	return old_val;
-- 
cgit v1.2.3


From 8e049ef054f1cc765f05f13e1396bb9a17c19e66 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 2 Jul 2009 08:57:12 +1000
Subject: x86: atomic64: Code atomic(64)_read and atomic(64)_set in C not CPP

Occasionally we get bugs where atomic_read or atomic_set are
used on atomic64_t variables or vice versa.  These bugs don't
generate warnings on x86 because atomic_read and atomic_set are
coded as macros rather than C functions, so we don't get any
type-checking on their arguments; similarly for atomic64_read
and atomic64_set in 64-bit kernels.

This converts them to C functions so that the arguments are
type-checked and bugs like this will get caught more easily. It
also converts atomic_cmpxchg and atomic_xchg, and
atomic64_cmpxchg and atomic64_xchg on 64-bit, so we get
type-checking on their arguments too.

Compiling a typical 64-bit x86 config, this generates no new
warnings, and the vmlinux text is 86 bytes smaller.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907021653030.3210@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/atomic_32.h | 16 ++++++++++++---
 arch/x86/include/asm/atomic_64.h | 42 ++++++++++++++++++++++++++++++++--------
 2 files changed, 47 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index b551bb1ae3cf..aa045deb2e75 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -31,7 +31,10 @@ static inline int atomic_read(const atomic_t *v)
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v, i)	(((v)->counter) = (i))
+static inline void atomic_set(atomic_t *v, int i)
+{
+	v->counter = i;
+}
 
 /**
  * atomic_add - add integer to atomic variable
@@ -203,8 +206,15 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	return atomic_add_return(-i, v);
 }
 
-#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	return cmpxchg(&v->counter, old, new);
+}
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+	return xchg(&v->counter, new);
+}
 
 /**
  * atomic_add_unless - add unless the number is already a given value
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h
index 0d6360220007..d605dc268e79 100644
--- a/arch/x86/include/asm/atomic_64.h
+++ b/arch/x86/include/asm/atomic_64.h
@@ -18,7 +18,10 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)		((v)->counter)
+static inline int atomic_read(const atomic_t *v)
+{
+	return v->counter;
+}
 
 /**
  * atomic_set - set atomic variable
@@ -27,7 +30,10 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v, i)		(((v)->counter) = (i))
+static inline void atomic_set(atomic_t *v, int i)
+{
+	v->counter = i;
+}
 
 /**
  * atomic_add - add integer to atomic variable
@@ -192,7 +198,10 @@ static inline int atomic_sub_return(int i, atomic_t *v)
  * Atomically reads the value of @v.
  * Doesn't imply a read memory barrier.
  */
-#define atomic64_read(v)		((v)->counter)
+static inline long atomic64_read(const atomic64_t *v)
+{
+	return v->counter;
+}
 
 /**
  * atomic64_set - set atomic64 variable
@@ -201,7 +210,10 @@ static inline int atomic_sub_return(int i, atomic_t *v)
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic64_set(v, i)		(((v)->counter) = (i))
+static inline void atomic64_set(atomic64_t *v, long i)
+{
+	v->counter = i;
+}
 
 /**
  * atomic64_add - add integer to atomic64 variable
@@ -355,11 +367,25 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
 #define atomic64_inc_return(v)  (atomic64_add_return(1, (v)))
 #define atomic64_dec_return(v)  (atomic64_sub_return(1, (v)))
 
-#define atomic64_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new)))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
+{
+	return cmpxchg(&v->counter, old, new);
+}
+
+static inline long atomic64_xchg(atomic64_t *v, long new)
+{
+	return xchg(&v->counter, new);
+}
 
-#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
+static inline long atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	return cmpxchg(&v->counter, old, new);
+}
+
+static inline long atomic_xchg(atomic_t *v, int new)
+{
+	return xchg(&v->counter, new);
+}
 
 /**
  * atomic_add_unless - add unless the number is a given value
-- 
cgit v1.2.3


From 67d7178f8fc64b7f68d7dd8a1b21dfa0d42c220c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 3 Jul 2009 13:23:02 +0200
Subject: x86: atomic64: Improve atomic64_read()

Optimize atomic64_read() as a special open-coded
cmpxchg8b variant. This generates nicer code:

arch/x86/lib/atomic64_32.o:

   text	   data	    bss	    dec	    hex	filename
    435	      0	      0	    435	    1b3	atomic64_32.o.before
    431	      0	      0	    431	    1af	atomic64_32.o.after

md5:
   bd8ab95e69c93518578bfaf0ea3be4d9  atomic64_32.o.before.asm
   2bdfd4bd1f6b7b61b7fc127aef90ce3b  atomic64_32.o.after.asm

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907021653030.3210@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/atomic64_32.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index fd28fd3fb742..cd11803f9448 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -62,9 +62,17 @@ void atomic64_set(atomic64_t *ptr, u64 new_val)
  */
 u64 atomic64_read(atomic64_t *ptr)
 {
-	u64 old = 1LL << 32;
+	u64 res;
 
-	return cmpxchg8b(&ptr->counter, old, old);
+	asm volatile(
+		"mov %%ebx, %%eax\n\t"
+		"mov %%ecx, %%edx\n\t"
+		LOCK_PREFIX "cmpxchg8b %1\n"
+			: "+A" (res)
+			: "m" (*ptr)
+		);
+
+	return res;
 }
 
 /**
-- 
cgit v1.2.3


From 1fde902d52ee13ab9fab155bbae757fdf7daf0c1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 17:28:57 +0200
Subject: x86: atomic64: Export APIs to modules

atomic64_t primitives are used by a handful of drivers,
so export the APIs consistently. These were inlined
before.

Also mark atomic64_32.o a core object, so that the symbols
are available even if not linked to core kernel pieces.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <tip-05118ab8859492ac9ddda0154cf90e37b0a4a0b0@git.kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/Makefile      |  2 +-
 arch/x86/lib/atomic64_32.c | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index c3c657c8bb83..07c31899c9c2 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -10,7 +10,7 @@ lib-y += usercopy_$(BITS).o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
 
 ifeq ($(CONFIG_X86_32),y)
-        lib-y += atomic64_32.o
+        obj-y += atomic64_32.o
         lib-y += checksum_32.o
         lib-y += strstr_32.o
         lib-y += semaphore_32.o string_32.o
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index cd11803f9448..6722a092e407 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -1,5 +1,7 @@
 #include <linux/compiler.h>
+#include <linux/module.h>
 #include <linux/types.h>
+
 #include <asm/processor.h>
 #include <asm/cmpxchg.h>
 #include <asm/atomic.h>
@@ -21,6 +23,7 @@ u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val)
 {
 	return cmpxchg8b(&ptr->counter, old_val, new_val);
 }
+EXPORT_SYMBOL(atomic64_cmpxchg);
 
 /**
  * atomic64_xchg - xchg atomic64 variable
@@ -41,6 +44,7 @@ u64 atomic64_xchg(atomic64_t *ptr, u64 new_val)
 
 	return old_val;
 }
+EXPORT_SYMBOL(atomic64_xchg);
 
 /**
  * atomic64_set - set atomic64 variable
@@ -53,6 +57,7 @@ void atomic64_set(atomic64_t *ptr, u64 new_val)
 {
 	atomic64_xchg(ptr, new_val);
 }
+EXPORT_SYMBOL(atomic64_read);
 
 /**
  * atomic64_read - read atomic64 variable
@@ -74,6 +79,7 @@ u64 atomic64_read(atomic64_t *ptr)
 
 	return res;
 }
+EXPORT_SYMBOL(atomic64_read);
 
 /**
  * atomic64_add_return - add and return
@@ -103,21 +109,25 @@ noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr)
 
 	return new_val;
 }
+EXPORT_SYMBOL(atomic64_add_return);
 
 u64 atomic64_sub_return(u64 delta, atomic64_t *ptr)
 {
 	return atomic64_add_return(-delta, ptr);
 }
+EXPORT_SYMBOL(atomic64_sub_return);
 
 u64 atomic64_inc_return(atomic64_t *ptr)
 {
 	return atomic64_add_return(1, ptr);
 }
+EXPORT_SYMBOL(atomic64_inc_return);
 
 u64 atomic64_dec_return(atomic64_t *ptr)
 {
 	return atomic64_sub_return(1, ptr);
 }
+EXPORT_SYMBOL(atomic64_dec_return);
 
 /**
  * atomic64_add - add integer to atomic64 variable
@@ -130,6 +140,7 @@ void atomic64_add(u64 delta, atomic64_t *ptr)
 {
 	atomic64_add_return(delta, ptr);
 }
+EXPORT_SYMBOL(atomic64_add);
 
 /**
  * atomic64_sub - subtract the atomic64 variable
@@ -142,6 +153,7 @@ void atomic64_sub(u64 delta, atomic64_t *ptr)
 {
 	atomic64_add(-delta, ptr);
 }
+EXPORT_SYMBOL(atomic64_sub);
 
 /**
  * atomic64_sub_and_test - subtract value from variable and test result
@@ -158,6 +170,7 @@ int atomic64_sub_and_test(u64 delta, atomic64_t *ptr)
 
 	return old_val == 0;
 }
+EXPORT_SYMBOL(atomic64_sub_and_test);
 
 /**
  * atomic64_inc - increment atomic64 variable
@@ -169,6 +182,7 @@ void atomic64_inc(atomic64_t *ptr)
 {
 	atomic64_add(1, ptr);
 }
+EXPORT_SYMBOL(atomic64_inc);
 
 /**
  * atomic64_dec - decrement atomic64 variable
@@ -180,6 +194,7 @@ void atomic64_dec(atomic64_t *ptr)
 {
 	atomic64_sub(1, ptr);
 }
+EXPORT_SYMBOL(atomic64_dec);
 
 /**
  * atomic64_dec_and_test - decrement and test
@@ -193,6 +208,7 @@ int atomic64_dec_and_test(atomic64_t *ptr)
 {
 	return atomic64_sub_and_test(1, ptr);
 }
+EXPORT_SYMBOL(atomic64_dec_and_test);
 
 /**
  * atomic64_inc_and_test - increment and test
@@ -206,6 +222,7 @@ int atomic64_inc_and_test(atomic64_t *ptr)
 {
 	return atomic64_sub_and_test(-1, ptr);
 }
+EXPORT_SYMBOL(atomic64_inc_and_test);
 
 /**
  * atomic64_add_negative - add and test if negative
@@ -222,3 +239,4 @@ int atomic64_add_negative(u64 delta, atomic64_t *ptr)
 
 	return old_val < 0;
 }
+EXPORT_SYMBOL(atomic64_add_negative);
-- 
cgit v1.2.3


From 3a8d1788b37435baf6c296f4ea8beb4fa4955f44 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 19:56:36 +0200
Subject: x86: atomic64: Improve atomic64_xchg()

Remove the read-first logic from atomic64_xchg() and simplify
the loop.

This function was the last user of __atomic64_read() - remove it.

Also, change the 'real_val' assumption from the somewhat quirky
1ULL << 32 value to the (just as arbitrary, but simpler) value
of 0.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <tip-05118ab8859492ac9ddda0154cf90e37b0a4a0b0@git.kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/atomic_32.h |  9 ---------
 arch/x86/lib/atomic64_32.c       | 21 +++++++++++++++------
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index aa045deb2e75..d7c8849b8c67 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -268,15 +268,6 @@ typedef struct {
 
 #define ATOMIC64_INIT(val)	{ (val) }
 
-/**
- * atomic64_read - read atomic64 variable
- * @ptr: pointer of type atomic64_t
- *
- * Atomically reads the value of @v.
- * Doesn't imply a read memory barrier.
- */
-#define __atomic64_read(ptr)		((ptr)->counter)
-
 extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val);
 
 /**
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index 6722a092e407..a804f96e90e2 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -33,14 +33,23 @@ EXPORT_SYMBOL(atomic64_cmpxchg);
  * Atomically xchgs the value of @ptr to @new_val and returns
  * the old value.
  */
-
 u64 atomic64_xchg(atomic64_t *ptr, u64 new_val)
 {
-	u64 old_val;
+	/*
+	 * Try first with a (possibly incorrect) assumption about
+	 * what we have there. We'll do two loops most likely,
+	 * but we'll get an ownership MESI transaction straight away
+	 * instead of a read transaction followed by a
+	 * flush-for-ownership transaction:
+	 */
+	u64 old_val, real_val = 0;
 
 	do {
-		old_val = __atomic64_read(ptr);
-	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+		old_val = real_val;
+
+		real_val = atomic64_cmpxchg(ptr, old_val, new_val);
+
+	} while (real_val != old_val);
 
 	return old_val;
 }
@@ -91,13 +100,13 @@ EXPORT_SYMBOL(atomic64_read);
 noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr)
 {
 	/*
-	 * Try first with a (probably incorrect) assumption about
+	 * Try first with a (possibly incorrect) assumption about
 	 * what we have there. We'll do two loops most likely,
 	 * but we'll get an ownership MESI transaction straight away
 	 * instead of a read transaction followed by a
 	 * flush-for-ownership transaction:
 	 */
-	u64 old_val, new_val, real_val = 1ULL << 32;
+	u64 old_val, new_val, real_val = 0;
 
 	do {
 		old_val = real_val;
-- 
cgit v1.2.3


From ddf9a003d32f720805ac30bcc15755e9289073de Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 20:11:30 +0200
Subject: x86: atomic64: Clean up atomic64_sub_and_test() and
 atomic64_add_negative()

Linus noticed that the variable name 'old_val' is
confusingly named in these functions - the correct
naming is 'new_val'.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <alpine.LFD.2.01.0907030942260.3210@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/atomic64_32.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index a804f96e90e2..1d98c9eb6eac 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -175,9 +175,9 @@ EXPORT_SYMBOL(atomic64_sub);
  */
 int atomic64_sub_and_test(u64 delta, atomic64_t *ptr)
 {
-	u64 old_val = atomic64_sub_return(delta, ptr);
+	u64 new_val = atomic64_sub_return(delta, ptr);
 
-	return old_val == 0;
+	return new_val == 0;
 }
 EXPORT_SYMBOL(atomic64_sub_and_test);
 
@@ -244,8 +244,8 @@ EXPORT_SYMBOL(atomic64_inc_and_test);
  */
 int atomic64_add_negative(u64 delta, atomic64_t *ptr)
 {
-	long long old_val = atomic64_add_return(delta, ptr);
+	s64 new_val = atomic64_add_return(delta, ptr);
 
-	return old_val < 0;
+	return new_val < 0;
 }
 EXPORT_SYMBOL(atomic64_add_negative);
-- 
cgit v1.2.3


From a79f0da80a508448434476b77f9d3d1a469eab67 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 3 Jul 2009 16:50:10 +0200
Subject: x86: atomic64: Inline atomic64_read() again

Now atomic64_read() is light weight (no register pressure and
small icache), we can inline it again.

Also use "=&A" constraint instead of "+A" to avoid warning
about unitialized 'res' variable. (gcc had to force 0 in eax/edx)

  $ size vmlinux.prev vmlinux.after
     text    data     bss     dec     hex filename
  4908667  451676 1684868 7045211  6b805b vmlinux.prev
  4908651  451676 1684868 7045195  6b804b vmlinux.after

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <4A4E1AA2.30002@gmail.com>
[ Also fix typo in atomic64_set() export ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/atomic_32.h | 22 ++++++++++++++++++++++
 arch/x86/lib/atomic64_32.c       | 23 +----------------------
 2 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index d7c8849b8c67..dc5a667ff791 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -295,6 +295,28 @@ extern void atomic64_set(atomic64_t *ptr, u64 new_val);
  *
  * Atomically reads the value of @ptr and returns it.
  */
+static inline u64 atomic64_read(atomic64_t *ptr)
+{
+	u64 res;
+
+	/*
+	 * Note, we inline this atomic64_t primitive because
+	 * it only clobbers EAX/EDX and leaves the others
+	 * untouched. We also (somewhat subtly) rely on the
+	 * fact that cmpxchg8b returns the current 64-bit value
+	 * of the memory location we are touching:
+	 */
+	asm volatile(
+		"mov %%ebx, %%eax\n\t"
+		"mov %%ecx, %%edx\n\t"
+		LOCK_PREFIX "cmpxchg8b %1\n"
+			: "=&A" (res)
+			: "m" (*ptr)
+		);
+
+	return res;
+}
+
 extern u64 atomic64_read(atomic64_t *ptr);
 
 /**
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index 1d98c9eb6eac..824fa0be55a3 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -66,31 +66,10 @@ void atomic64_set(atomic64_t *ptr, u64 new_val)
 {
 	atomic64_xchg(ptr, new_val);
 }
-EXPORT_SYMBOL(atomic64_read);
+EXPORT_SYMBOL(atomic64_set);
 
 /**
- * atomic64_read - read atomic64 variable
- * @ptr:      pointer to type atomic64_t
- *
- * Atomically reads the value of @ptr and returns it.
- */
-u64 atomic64_read(atomic64_t *ptr)
-{
-	u64 res;
-
-	asm volatile(
-		"mov %%ebx, %%eax\n\t"
-		"mov %%ecx, %%edx\n\t"
-		LOCK_PREFIX "cmpxchg8b %1\n"
-			: "+A" (res)
-			: "m" (*ptr)
-		);
-
-	return res;
-}
 EXPORT_SYMBOL(atomic64_read);
-
-/**
  * atomic64_add_return - add and return
  * @delta: integer value to add
  * @ptr:   pointer to type atomic64_t
-- 
cgit v1.2.3