From ff18143ceed3424b7d6cdb8659b9692fa734f0d8 Mon Sep 17 00:00:00 2001
From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Date: Wed, 20 Apr 2016 14:41:00 +0800
Subject: sh: cmpxchg: fix a bit shift bug in big_endian os

Correct bitoff in big endian OS.
Current code works correctly for 1 byte but not for 2 bytes.

Fixes: 3226aad81aa6 ("sh: support 1 and 2 byte xchg")
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rich Felker <dalias@libc.org>
---
 arch/sh/include/asm/cmpxchg-xchg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/sh/include')

diff --git a/arch/sh/include/asm/cmpxchg-xchg.h b/arch/sh/include/asm/cmpxchg-xchg.h
index 7219719c23a3..1e881f5db659 100644
--- a/arch/sh/include/asm/cmpxchg-xchg.h
+++ b/arch/sh/include/asm/cmpxchg-xchg.h
@@ -21,7 +21,7 @@ static inline u32 __xchg_cmpxchg(volatile void *ptr, u32 x, int size)
 	int off = (unsigned long)ptr % sizeof(u32);
 	volatile u32 *p = ptr - off;
 #ifdef __BIG_ENDIAN
-	int bitoff = (sizeof(u32) - 1 - off) * BITS_PER_BYTE;
+	int bitoff = (sizeof(u32) - size - off) * BITS_PER_BYTE;
 #else
 	int bitoff = off * BITS_PER_BYTE;
 #endif
-- 
cgit v1.2.3


From bbe6c77857c38f4acbdc4fc70399515226d1859a Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@libc.org>
Date: Fri, 1 Apr 2016 20:10:19 +0000
Subject: sh: make sigcontext definition consistent across fpu/nofpu models

Up until now, the SH version of the sigcontext structure, and thus
mcontext_t/ucontext_t, varied depending on the cpu model the kernel
was built to run on. SH-4 (including SH-4A) and SH-2A used the form
with space for FPU registers, and everything else used a form that
omitted them.

From a userspace perspective, however, the structure layout must be
fixed for a given ABI. Traditionally glibc and uClibc used the form
with space for FPU registers only when __SH4__ (which implies FPU;
__SH4_NOFPU__ is the predefined macro for SH-4 but with no-FPU ABI)
was defined. As a result:

- SH-4 no-FPU programs never matched kernel sigcontext.

- SH-3 programs did not match kernel sigcontext if run on SH-4,
  despite an apparent intent that they be compatible.

- SH-2 and SH-2A programs (using uClibc) did not match kernel
  sigcontext if run on SH-2A.

The mismatch might seem inconsequential because it occurs at the end
of the sigcontext structure, but sigcontext is embedded as uc_mcontext
in ucontext_t, where it is followed by uc_sigmask, an important member
for signal handlers to have access to. In particular, access to
uc_sigmask is necessary for a correct implementation of thread
cancellation.

It would be possible to retain support for both sigcontext ABIs via a
personality mechanism, but since many configurations were already
broken and nobody noticed, and since there are very few if any users
of legacy no-FPU models anymore, I have opted to just remove the
variation and always include space for the FPU registers in
sigcontext. This was proposed and discussed on a thread "SH sigcontext
ABI is broken" cross-posted to linux-sh, libc-alpha, and musl libc
lists in June 2015, and no objections were raised.

Signed-off-by: Rich Felker <dalias@libc.org>
---
 arch/sh/include/uapi/asm/sigcontext.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/sh/include')

diff --git a/arch/sh/include/uapi/asm/sigcontext.h b/arch/sh/include/uapi/asm/sigcontext.h
index 8ce1435bc0bf..faa5d0833412 100644
--- a/arch/sh/include/uapi/asm/sigcontext.h
+++ b/arch/sh/include/uapi/asm/sigcontext.h
@@ -25,8 +25,6 @@ struct sigcontext {
 	unsigned long sc_mach;
 	unsigned long sc_macl;
 
-#if defined(__SH4__) || defined(CONFIG_CPU_SH4) || \
-    defined(__SH2A__) || defined(CONFIG_CPU_SH2A)
 	/* FPU registers */
 	unsigned long sc_fpregs[16];
 	unsigned long sc_xfpregs[16];
@@ -34,7 +32,6 @@ struct sigcontext {
 	unsigned int sc_fpul;
 	unsigned int sc_ownedfp;
 #endif
-#endif
 };
 
 #endif /* __ASM_SH_SIGCONTEXT_H */
-- 
cgit v1.2.3


From 5a846abad07f6f30adfa3e46c5c7a47d2e7b1e63 Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@libc.org>
Date: Thu, 17 Mar 2016 23:09:37 +0000
Subject: sh: add support for J-Core J2 processor

At the CPU/ISA level, the J2 is compatible with SH-2, and thus the
changes to add J2 support build on existing SH-2 support. However, J2
does not duplicate the memory-mapped SH-2 features like the cache
interface. Instead, the cache interfaces is described in the device
tree, and new code is added to be able to access the flat device tree
at early boot before it is unflattened.

Support is also added for receiving interrupts on trap numbers in the
range 16 to 31, since the J-Core aic1 interrupt controller generates
these traps. This range was unused but nominally for hardware
exceptions on SH-2, and a few values in this range were used for
exceptions on SH-2A, but SH-2A has its own version of the relevant
code.

No individual cpu subtypes are added for J2 since the intent moving
forward is to represent SoCs with device tree rather than as
hard-coded subtypes in the kernel. The CPU_SUBTYPE_J2 Kconfig item
exists only to fit into the existing cpu selection mechanism until it
is overhauled.

Signed-off-by: Rich Felker <dalias@libc.org>
---
 arch/sh/Kconfig                 | 10 +++++++
 arch/sh/Makefile                |  1 +
 arch/sh/include/asm/processor.h |  2 +-
 arch/sh/kernel/cpu/init.c       |  2 +-
 arch/sh/kernel/cpu/proc.c       |  1 +
 arch/sh/kernel/cpu/sh2/entry.S  |  5 ++++
 arch/sh/kernel/cpu/sh2/probe.c  | 37 ++++++++++++++++++++++-
 arch/sh/mm/Makefile             |  3 +-
 arch/sh/mm/cache-j2.c           | 65 +++++++++++++++++++++++++++++++++++++++++
 arch/sh/mm/cache.c              |  6 +++-
 10 files changed, 127 insertions(+), 5 deletions(-)
 create mode 100644 arch/sh/mm/cache-j2.c

(limited to 'arch/sh/include')

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index df6339d7bf72..2ef6f652bc50 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -186,6 +186,12 @@ config CPU_SH2A
 	select CPU_SH2
 	select UNCACHED_MAPPING
 
+config CPU_J2
+	bool
+	select CPU_SH2
+	select OF
+	select OF_EARLY_FLATTREE
+
 config CPU_SH3
 	bool
 	select CPU_HAS_INTEVT
@@ -252,6 +258,10 @@ config CPU_SUBTYPE_SH7619
 	select CPU_SH2
 	select SYS_SUPPORTS_SH_CMT
 
+config CPU_SUBTYPE_J2
+	bool "Support J2 processor"
+	select CPU_J2
+
 # SH-2A Processor Support
 
 config CPU_SUBTYPE_SH7201
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 3b2c8b4827d0..00476662ac2c 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -31,6 +31,7 @@ isa-y					:= $(isa-y)-up
 endif
 
 cflags-$(CONFIG_CPU_SH2)		:= $(call cc-option,-m2,)
+cflags-$(CONFIG_CPU_J2)			:= $(call cc-option,-mj2,)
 cflags-$(CONFIG_CPU_SH2A)		+= $(call cc-option,-m2a,) \
 					   $(call cc-option,-m2a-nofpu,) \
 					   $(call cc-option,-m4-nofpu,)
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 1506897648aa..f9a09942a32d 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -15,7 +15,7 @@
  */
 enum cpu_type {
 	/* SH-2 types */
-	CPU_SH7619,
+	CPU_SH7619, CPU_J2,
 
 	/* SH-2A types */
 	CPU_SH7201, CPU_SH7203, CPU_SH7206, CPU_SH7263, CPU_SH7264, CPU_SH7269,
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index bfd9e2798008..c8b3be1b54e6 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -106,7 +106,7 @@ void __attribute__ ((weak)) l2_cache_init(void)
 /*
  * Generic first-level cache init
  */
-#ifdef CONFIG_SUPERH32
+#if defined(CONFIG_SUPERH32) && !defined(CONFIG_CPU_J2)
 static void cache_init(void)
 {
 	unsigned long ccr, flags;
diff --git a/arch/sh/kernel/cpu/proc.c b/arch/sh/kernel/cpu/proc.c
index 9e6624c9108b..4df4b284f591 100644
--- a/arch/sh/kernel/cpu/proc.c
+++ b/arch/sh/kernel/cpu/proc.c
@@ -27,6 +27,7 @@ static const char *cpu_name[] = {
 	[CPU_MXG]	= "MX-G",	[CPU_SH7723]	= "SH7723",
 	[CPU_SH7366]	= "SH7366",	[CPU_SH7724]	= "SH7724",
 	[CPU_SH7372]	= "SH7372",	[CPU_SH7734]	= "SH7734",
+	[CPU_J2]	= "J2",
 	[CPU_SH_NONE]	= "Unknown"
 };
 
diff --git a/arch/sh/kernel/cpu/sh2/entry.S b/arch/sh/kernel/cpu/sh2/entry.S
index a1505956ef28..16bde0efaca3 100644
--- a/arch/sh/kernel/cpu/sh2/entry.S
+++ b/arch/sh/kernel/cpu/sh2/entry.S
@@ -147,6 +147,11 @@ ENTRY(exception_handler)
 	mov	#31,r8
 	cmp/hs	r8,r9
 	bt	trap_entry	! 64 > vec >= 31  is trap
+#ifdef CONFIG_CPU_J2
+	mov	#16,r8
+	cmp/hs	r8,r9
+	bt	interrupt_entry	! 31 > vec >= 16 is interrupt
+#endif
 
 	mov.l	4f,r8
 	mov	r9,r4
diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c
index 6c687ae812ef..152184007964 100644
--- a/arch/sh/kernel/cpu/sh2/probe.c
+++ b/arch/sh/kernel/cpu/sh2/probe.c
@@ -10,10 +10,27 @@
  * for more details.
  */
 #include <linux/init.h>
+#include <linux/of_fdt.h>
+#include <linux/smp.h>
+#include <linux/io.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
 
-void cpu_probe(void)
+#if defined(CONFIG_CPU_J2)
+extern u32 __iomem *j2_ccr_base;
+static int __init scan_cache(unsigned long node, const char *uname,
+			     int depth, void *data)
+{
+	if (!of_flat_dt_is_compatible(node, "jcore,cache"))
+		return 0;
+
+	j2_ccr_base = (u32 __iomem *)of_flat_dt_translate_address(node);
+
+	return 1;
+}
+#endif
+
+void __ref cpu_probe(void)
 {
 #if defined(CONFIG_CPU_SUBTYPE_SH7619)
 	boot_cpu_data.type			= CPU_SH7619;
@@ -24,10 +41,28 @@ void cpu_probe(void)
 	boot_cpu_data.dcache.linesz		= L1_CACHE_BYTES;
 	boot_cpu_data.dcache.flags		= 0;
 #endif
+
+#if defined(CONFIG_CPU_J2)
+	unsigned cpu = hard_smp_processor_id();
+	if (cpu == 0) of_scan_flat_dt(scan_cache, NULL);
+	if (j2_ccr_base) __raw_writel(0x80000303, j2_ccr_base + 4*cpu);
+	if (cpu != 0) return;
+	boot_cpu_data.type			= CPU_J2;
+
+	/* These defaults are appropriate for the original/current
+	 * J2 cache. Once there is a proper framework for getting cache
+	 * info from device tree, we should switch to that. */
+	boot_cpu_data.dcache.ways		= 1;
+	boot_cpu_data.dcache.sets		= 256;
+	boot_cpu_data.dcache.entry_shift	= 5;
+	boot_cpu_data.dcache.linesz		= 32;
+	boot_cpu_data.dcache.flags		= 0;
+#else
 	/*
 	 * SH-2 doesn't have separate caches
 	 */
 	boot_cpu_data.dcache.flags |= SH_CACHE_COMBINED;
+#endif
 	boot_cpu_data.icache = boot_cpu_data.dcache;
 	boot_cpu_data.family = CPU_FAMILY_SH2;
 }
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index cee6b9999d86..92c3bd96aee5 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -4,7 +4,8 @@
 
 obj-y			:= alignment.o cache.o init.o consistent.o mmap.o
 
-cacheops-$(CONFIG_CPU_SH2)		:= cache-sh2.o
+cacheops-$(CONFIG_CPU_J2)		:= cache-j2.o
+cacheops-$(CONFIG_CPU_SUBTYPE_SH7619)	:= cache-sh2.o
 cacheops-$(CONFIG_CPU_SH2A)		:= cache-sh2a.o
 cacheops-$(CONFIG_CPU_SH3)		:= cache-sh3.o
 cacheops-$(CONFIG_CPU_SH4)		:= cache-sh4.o flush-sh4.o
diff --git a/arch/sh/mm/cache-j2.c b/arch/sh/mm/cache-j2.c
new file mode 100644
index 000000000000..391698bcac5b
--- /dev/null
+++ b/arch/sh/mm/cache-j2.c
@@ -0,0 +1,65 @@
+/*
+ * arch/sh/mm/cache-j2.c
+ *
+ * Copyright (C) 2015-2016 Smart Energy Instruments, Inc.
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/cpumask.h>
+
+#include <asm/cache.h>
+#include <asm/addrspace.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+
+#define ICACHE_ENABLE	0x1
+#define DCACHE_ENABLE	0x2
+#define CACHE_ENABLE	(ICACHE_ENABLE | DCACHE_ENABLE)
+#define ICACHE_FLUSH	0x100
+#define DCACHE_FLUSH	0x200
+#define CACHE_FLUSH	(ICACHE_FLUSH | DCACHE_FLUSH)
+
+u32 __iomem *j2_ccr_base;
+
+static void j2_flush_icache(void *args)
+{
+	unsigned cpu;
+	for_each_possible_cpu(cpu)
+		__raw_writel(CACHE_ENABLE | ICACHE_FLUSH, j2_ccr_base + cpu);
+}
+
+static void j2_flush_dcache(void *args)
+{
+	unsigned cpu;
+	for_each_possible_cpu(cpu)
+		__raw_writel(CACHE_ENABLE | DCACHE_FLUSH, j2_ccr_base + cpu);
+}
+
+static void j2_flush_both(void *args)
+{
+	unsigned cpu;
+	for_each_possible_cpu(cpu)
+		__raw_writel(CACHE_ENABLE | CACHE_FLUSH, j2_ccr_base + cpu);
+}
+
+void __init j2_cache_init(void)
+{
+	if (!j2_ccr_base)
+		return;
+
+	local_flush_cache_all = j2_flush_both;
+	local_flush_cache_mm = j2_flush_both;
+	local_flush_cache_dup_mm = j2_flush_both;
+	local_flush_cache_page = j2_flush_both;
+	local_flush_cache_range = j2_flush_both;
+	local_flush_dcache_page = j2_flush_dcache;
+	local_flush_icache_range = j2_flush_icache;
+	local_flush_icache_page = j2_flush_icache;
+	local_flush_cache_sigtramp = j2_flush_icache;
+
+	pr_info("Initial J2 CCR is %.8x\n", __raw_readl(j2_ccr_base));
+}
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index 776d664a40c5..70cc52f2fab8 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -309,7 +309,11 @@ void __init cpu_cache_init(void)
 	if (unlikely(cache_disabled))
 		goto skip;
 
-	if (boot_cpu_data.family == CPU_FAMILY_SH2) {
+	if (boot_cpu_data.type == CPU_J2) {
+		extern void __weak j2_cache_init(void);
+
+		j2_cache_init();
+	} else if (boot_cpu_data.family == CPU_FAMILY_SH2) {
 		extern void __weak sh2_cache_init(void);
 
 		sh2_cache_init();
-- 
cgit v1.2.3


From 834da197058bebcb343320dafb8b62d416d4038c Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@libc.org>
Date: Fri, 22 Apr 2016 23:29:13 +0000
Subject: sh: add AT_HWCAP flag for J-Core cas.l instruction

The J-Core cpu has, as an ISA extension, an atomic compare-and-swap
instruction cas.l which applications need to use (instead the imask or
gusa atomic models, which are fundamentally limited to UP) for
synchronization in order to be compatible with SMP systems. Provide a
hwcap flag so that it's possible to do runtime selection and support
both.

Signed-off-by: Rich Felker <dalias@libc.org>
---
 arch/sh/include/uapi/asm/cpu-features.h | 1 +
 arch/sh/kernel/cpu/sh2/probe.c          | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'arch/sh/include')

diff --git a/arch/sh/include/uapi/asm/cpu-features.h b/arch/sh/include/uapi/asm/cpu-features.h
index 694abe490edb..2f1bc851042a 100644
--- a/arch/sh/include/uapi/asm/cpu-features.h
+++ b/arch/sh/include/uapi/asm/cpu-features.h
@@ -22,5 +22,6 @@
 #define CPU_HAS_L2_CACHE	0x0080	/* Secondary cache / URAM */
 #define CPU_HAS_OP32		0x0100	/* 32-bit instruction support */
 #define CPU_HAS_PTEAEX		0x0200	/* PTE ASID Extension support */
+#define CPU_HAS_CAS_L		0x0400	/* cas.l atomic compare-and-swap */
 
 #endif /* __ASM_SH_CPU_FEATURES_H */
diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c
index 152184007964..4205f6d42b69 100644
--- a/arch/sh/kernel/cpu/sh2/probe.c
+++ b/arch/sh/kernel/cpu/sh2/probe.c
@@ -57,6 +57,8 @@ void __ref cpu_probe(void)
 	boot_cpu_data.dcache.entry_shift	= 5;
 	boot_cpu_data.dcache.linesz		= 32;
 	boot_cpu_data.dcache.flags		= 0;
+
+	boot_cpu_data.flags |= CPU_HAS_CAS_L;
 #else
 	/*
 	 * SH-2 doesn't have separate caches
-- 
cgit v1.2.3


From 2b47d54ed41c33baf5825185168b493317c5572f Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@libc.org>
Date: Thu, 28 Jul 2016 19:21:10 +0000
Subject: sh: add J2 atomics using the cas.l instruction

Signed-off-by: Rich Felker <dalias@libc.org>
---
 arch/sh/include/asm/atomic.h        |   8 ++
 arch/sh/include/asm/barrier.h       |   5 +
 arch/sh/include/asm/bitops-cas.h    |  93 +++++++++++++++
 arch/sh/include/asm/bitops.h        |   2 +
 arch/sh/include/asm/cmpxchg-cas.h   |  24 ++++
 arch/sh/include/asm/cmpxchg.h       |   2 +
 arch/sh/include/asm/spinlock-cas.h  | 117 +++++++++++++++++++
 arch/sh/include/asm/spinlock-llsc.h | 224 ++++++++++++++++++++++++++++++++++++
 arch/sh/include/asm/spinlock.h      | 222 +----------------------------------
 9 files changed, 481 insertions(+), 216 deletions(-)
 create mode 100644 arch/sh/include/asm/bitops-cas.h
 create mode 100644 arch/sh/include/asm/cmpxchg-cas.h
 create mode 100644 arch/sh/include/asm/spinlock-cas.h
 create mode 100644 arch/sh/include/asm/spinlock-llsc.h

(limited to 'arch/sh/include')

diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index c399e1c55685..8a7bd80c8b33 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -1,6 +1,12 @@
 #ifndef __ASM_SH_ATOMIC_H
 #define __ASM_SH_ATOMIC_H
 
+#if defined(CONFIG_CPU_J2)
+
+#include <asm-generic/atomic.h>
+
+#else
+
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
@@ -63,4 +69,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
+#endif /* CONFIG_CPU_J2 */
+
 #endif /* __ASM_SH_ATOMIC_H */
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 8a84e05adb2e..3c30b6e166b6 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -29,6 +29,11 @@
 #define wmb()		mb()
 #define ctrl_barrier()	__icbi(PAGE_OFFSET)
 #else
+#if defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
+#define __smp_mb()	do { int tmp = 0; __asm__ __volatile__ ("cas.l %0,%0,@%1" : "+r"(tmp) : "z"(&tmp) : "memory", "t"); } while(0)
+#define __smp_rmb()	__smp_mb()
+#define __smp_wmb()	__smp_mb()
+#endif
 #define ctrl_barrier()	__asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
 #endif
 
diff --git a/arch/sh/include/asm/bitops-cas.h b/arch/sh/include/asm/bitops-cas.h
new file mode 100644
index 000000000000..88f793c04d3c
--- /dev/null
+++ b/arch/sh/include/asm/bitops-cas.h
@@ -0,0 +1,93 @@
+#ifndef __ASM_SH_BITOPS_CAS_H
+#define __ASM_SH_BITOPS_CAS_H
+
+static inline unsigned __bo_cas(volatile unsigned *p, unsigned old, unsigned new)
+{
+	__asm__ __volatile__("cas.l %1,%0,@r0"
+		: "+r"(new)
+		: "r"(old), "z"(p)
+		: "t", "memory" );
+	return new;
+}
+
+static inline void set_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old|mask) != old);
+}
+
+static inline void clear_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old&~mask) != old);
+}
+
+static inline void change_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old^mask) != old);
+}
+
+static inline int test_and_set_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old|mask) != old);
+
+	return !!(old & mask);
+}
+
+static inline int test_and_clear_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old&~mask) != old);
+
+	return !!(old & mask);
+}
+
+static inline int test_and_change_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old^mask) != old);
+
+	return !!(old & mask);
+}
+
+#include <asm-generic/bitops/non-atomic.h>
+
+#endif /* __ASM_SH_BITOPS_CAS_H */
diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h
index fc8e652cf173..a8699d60a8c4 100644
--- a/arch/sh/include/asm/bitops.h
+++ b/arch/sh/include/asm/bitops.h
@@ -18,6 +18,8 @@
 #include <asm/bitops-op32.h>
 #elif defined(CONFIG_CPU_SH4A)
 #include <asm/bitops-llsc.h>
+#elif defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
+#include <asm/bitops-cas.h>
 #else
 #include <asm-generic/bitops/atomic.h>
 #include <asm-generic/bitops/non-atomic.h>
diff --git a/arch/sh/include/asm/cmpxchg-cas.h b/arch/sh/include/asm/cmpxchg-cas.h
new file mode 100644
index 000000000000..d0d86649e8c1
--- /dev/null
+++ b/arch/sh/include/asm/cmpxchg-cas.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_SH_CMPXCHG_CAS_H
+#define __ASM_SH_CMPXCHG_CAS_H
+
+static inline unsigned long
+__cmpxchg_u32(volatile u32 *m, unsigned long old, unsigned long new)
+{
+	__asm__ __volatile__("cas.l %1,%0,@r0"
+		: "+r"(new)
+		: "r"(old), "z"(m)
+		: "t", "memory" );
+	return new;
+}
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	unsigned long old;
+	do old = *m;
+	while (__cmpxchg_u32(m, old, val) != old);
+	return old;
+}
+
+#include <asm/cmpxchg-xchg.h>
+
+#endif /* __ASM_SH_CMPXCHG_CAS_H */
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index 5225916c1057..3dfe0467a773 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -13,6 +13,8 @@
 #include <asm/cmpxchg-grb.h>
 #elif defined(CONFIG_CPU_SH4A)
 #include <asm/cmpxchg-llsc.h>
+#elif defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
+#include <asm/cmpxchg-cas.h>
 #else
 #include <asm/cmpxchg-irq.h>
 #endif
diff --git a/arch/sh/include/asm/spinlock-cas.h b/arch/sh/include/asm/spinlock-cas.h
new file mode 100644
index 000000000000..c46e8cc7b515
--- /dev/null
+++ b/arch/sh/include/asm/spinlock-cas.h
@@ -0,0 +1,117 @@
+/*
+ * include/asm-sh/spinlock-cas.h
+ *
+ * Copyright (C) 2015 SEI
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_SH_SPINLOCK_CAS_H
+#define __ASM_SH_SPINLOCK_CAS_H
+
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new)
+{
+	__asm__ __volatile__("cas.l %1,%0,@r0"
+		: "+r"(new)
+		: "r"(old), "z"(p)
+		: "t", "memory" );
+	return new;
+}
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+#define arch_spin_is_locked(x)		((x)->lock <= 0)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	while (!__sl_cas(&lock->lock, 1, 0));
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	__sl_cas(&lock->lock, 0, 1);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	return __sl_cas(&lock->lock, 1, 0);
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts but no interrupt
+ * writers. For those circumstances we can "mix" irq-safe locks - any writer
+ * needs to get a irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_read_can_lock(x)	((x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned old;
+	do old = rw->lock;
+	while (!old || __sl_cas(&rw->lock, old, old-1) != old);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned old;
+	do old = rw->lock;
+	while (__sl_cas(&rw->lock, old, old+1) != old);
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	while (__sl_cas(&rw->lock, RW_LOCK_BIAS, 0) != RW_LOCK_BIAS);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	__sl_cas(&rw->lock, 0, RW_LOCK_BIAS);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned old;
+	do old = rw->lock;
+	while (old && __sl_cas(&rw->lock, old, old-1) != old);
+	return !!old;
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	return __sl_cas(&rw->lock, RW_LOCK_BIAS, 0) == RW_LOCK_BIAS;
+}
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SH_SPINLOCK_CAS_H */
diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h
new file mode 100644
index 000000000000..cec78143fa83
--- /dev/null
+++ b/arch/sh/include/asm/spinlock-llsc.h
@@ -0,0 +1,224 @@
+/*
+ * include/asm-sh/spinlock-llsc.h
+ *
+ * Copyright (C) 2002, 2003 Paul Mundt
+ * Copyright (C) 2006, 2007 Akio Idehara
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_SH_SPINLOCK_LLSC_H
+#define __ASM_SH_SPINLOCK_LLSC_H
+
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+#define arch_spin_is_locked(x)		((x)->lock <= 0)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions.  They have a cost.
+ */
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned long tmp;
+	unsigned long oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_spin_lock	\n\t"
+		"mov		%0, %1				\n\t"
+		"mov		#0, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"cmp/pl		%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"mov		#1, %0 ! arch_spin_unlock	\n\t"
+		"mov.l		%0, @%1				\n\t"
+		: "=&z" (tmp)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_spin_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"mov		#0, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+
+	return oldval;
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts but no interrupt
+ * writers. For those circumstances we can "mix" irq-safe locks - any writer
+ * needs to get a irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_read_can_lock(x)	((x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! arch_read_lock	\n\t"
+		"cmp/pl		%0				\n\t"
+		"bf		1b				\n\t"
+		"add		#-1, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! arch_read_unlock	\n\t"
+		"add		#1, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! arch_write_lock	\n\t"
+		"cmp/hs		%2, %0				\n\t"
+		"bf		1b				\n\t"
+		"sub		%2, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	__asm__ __volatile__ (
+		"mov.l		%1, @%0 ! arch_write_unlock	\n\t"
+		:
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_read_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"cmp/pl		%0				\n\t"
+		"bf		2f				\n\t"
+		"add		#-1, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"2:						\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+
+	return (oldval > 0);
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_write_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"cmp/hs		%3, %0				\n\t"
+		"bf		2f				\n\t"
+		"sub		%3, %0				\n\t"
+		"2:						\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+
+	return (oldval > (RW_LOCK_BIAS - 1));
+}
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SH_SPINLOCK_LLSC_H */
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index 416834b60ad0..c2c61ea6a8e2 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -11,222 +11,12 @@
 #ifndef __ASM_SH_SPINLOCK_H
 #define __ASM_SH_SPINLOCK_H
 
-/*
- * The only locking implemented here uses SH-4A opcodes. For others,
- * split this out as per atomic-*.h.
- */
-#ifndef CONFIG_CPU_SH4A
-#error "Need movli.l/movco.l for spinlocks"
+#if defined(CONFIG_CPU_SH4A)
+#include <asm/spinlock-llsc.h>
+#elif defined(CONFIG_CPU_J2)
+#include <asm/spinlock-cas.h>
+#else
+#error "The configured cpu type does not support spinlocks"
 #endif
 
-#include <asm/barrier.h>
-#include <asm/processor.h>
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- */
-
-#define arch_spin_is_locked(x)		((x)->lock <= 0)
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->lock, VAL > 0);
-}
-
-/*
- * Simple spin lock operations.  There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions.  They have a cost.
- */
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	unsigned long tmp;
-	unsigned long oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_spin_lock	\n\t"
-		"mov		%0, %1				\n\t"
-		"mov		#0, %0				\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"cmp/pl		%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&lock->lock)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"mov		#1, %0 ! arch_spin_unlock	\n\t"
-		"mov.l		%0, @%1				\n\t"
-		: "=&z" (tmp)
-		: "r" (&lock->lock)
-		: "t", "memory"
-	);
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	unsigned long tmp, oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_spin_trylock	\n\t"
-		"mov		%0, %1				\n\t"
-		"mov		#0, %0				\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"synco						\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&lock->lock)
-		: "t", "memory"
-	);
-
-	return oldval;
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts but no interrupt
- * writers. For those circumstances we can "mix" irq-safe locks - any writer
- * needs to get a irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-
-/**
- * read_can_lock - would read_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define arch_read_can_lock(x)	((x)->lock > 0)
-
-/**
- * write_can_lock - would write_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define arch_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
-
-static inline void arch_read_lock(arch_rwlock_t *rw)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%1, %0	! arch_read_lock	\n\t"
-		"cmp/pl		%0				\n\t"
-		"bf		1b				\n\t"
-		"add		#-1, %0				\n\t"
-		"movco.l	%0, @%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp)
-		: "r" (&rw->lock)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *rw)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%1, %0	! arch_read_unlock	\n\t"
-		"add		#1, %0				\n\t"
-		"movco.l	%0, @%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp)
-		: "r" (&rw->lock)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_write_lock(arch_rwlock_t *rw)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%1, %0	! arch_write_lock	\n\t"
-		"cmp/hs		%2, %0				\n\t"
-		"bf		1b				\n\t"
-		"sub		%2, %0				\n\t"
-		"movco.l	%0, @%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp)
-		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *rw)
-{
-	__asm__ __volatile__ (
-		"mov.l		%1, @%0 ! arch_write_unlock	\n\t"
-		:
-		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
-		: "t", "memory"
-	);
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *rw)
-{
-	unsigned long tmp, oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_read_trylock	\n\t"
-		"mov		%0, %1				\n\t"
-		"cmp/pl		%0				\n\t"
-		"bf		2f				\n\t"
-		"add		#-1, %0				\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"2:						\n\t"
-		"synco						\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&rw->lock)
-		: "t", "memory"
-	);
-
-	return (oldval > 0);
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *rw)
-{
-	unsigned long tmp, oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_write_trylock	\n\t"
-		"mov		%0, %1				\n\t"
-		"cmp/hs		%3, %0				\n\t"
-		"bf		2f				\n\t"
-		"sub		%3, %0				\n\t"
-		"2:						\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"synco						\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
-		: "t", "memory"
-	);
-
-	return (oldval > (RW_LOCK_BIAS - 1));
-}
-
-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* __ASM_SH_SPINLOCK_H */
-- 
cgit v1.2.3


From 00b73d8d1b7131da03aec73011a7286f566fe87f Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@libc.org>
Date: Mon, 25 Apr 2016 21:58:03 +0000
Subject: sh: add working futex atomic ops on userspace addresses for smp

The version of futex.h in asm-generic should really be adapted to do
the same thing so that this hideous code does not have to be
duplicated per-arch.

Signed-off-by: Rich Felker <dalias@libc.org>
---
 arch/sh/include/asm/futex-cas.h  | 34 ++++++++++++++
 arch/sh/include/asm/futex-irq.h  | 86 -----------------------------------
 arch/sh/include/asm/futex-llsc.h | 41 +++++++++++++++++
 arch/sh/include/asm/futex.h      | 97 ++++++++++++++++++++++++----------------
 4 files changed, 134 insertions(+), 124 deletions(-)
 create mode 100644 arch/sh/include/asm/futex-cas.h
 create mode 100644 arch/sh/include/asm/futex-llsc.h

(limited to 'arch/sh/include')

diff --git a/arch/sh/include/asm/futex-cas.h b/arch/sh/include/asm/futex-cas.h
new file mode 100644
index 000000000000..267cb7a5f101
--- /dev/null
+++ b/arch/sh/include/asm/futex-cas.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_SH_FUTEX_CAS_H
+#define __ASM_SH_FUTEX_CAS_H
+
+static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
+						   u32 __user *uaddr,
+						   u32 oldval, u32 newval)
+{
+	int err = 0;
+	__asm__ __volatile__(
+		"1:\n\t"
+		"cas.l	%2, %1, @r0\n"
+		"2:\n\t"
+#ifdef CONFIG_MMU
+		".section	.fixup,\"ax\"\n"
+		"3:\n\t"
+		"mov.l	4f, %0\n\t"
+		"jmp	@%0\n\t"
+		" mov	%3, %0\n\t"
+		".balign	4\n"
+		"4:	.long	2b\n\t"
+		".previous\n"
+		".section	__ex_table,\"a\"\n\t"
+		".long	1b, 3b\n\t"
+		".previous"
+#endif
+		:"+r" (err), "+r" (newval)
+		:"r" (oldval), "i" (-EFAULT), "z" (uaddr)
+		:"t", "memory");
+	if (err) return err;
+	*uval = newval;
+	return 0;
+}
+
+#endif /* __ASM_SH_FUTEX_CAS_H */
diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h
index 63d33129ea23..ab01dbee0a82 100644
--- a/arch/sh/include/asm/futex-irq.h
+++ b/arch/sh/include/asm/futex-irq.h
@@ -1,92 +1,6 @@
 #ifndef __ASM_SH_FUTEX_IRQ_H
 #define __ASM_SH_FUTEX_IRQ_H
 
-
-static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval + oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr,
-					  int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval | oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval & oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval ^ oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
 static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
 						   u32 __user *uaddr,
 						   u32 oldval, u32 newval)
diff --git a/arch/sh/include/asm/futex-llsc.h b/arch/sh/include/asm/futex-llsc.h
new file mode 100644
index 000000000000..23591703bec0
--- /dev/null
+++ b/arch/sh/include/asm/futex-llsc.h
@@ -0,0 +1,41 @@
+#ifndef __ASM_SH_FUTEX_LLSC_H
+#define __ASM_SH_FUTEX_LLSC_H
+
+static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
+						   u32 __user *uaddr,
+						   u32 oldval, u32 newval)
+{
+	int err = 0;
+	__asm__ __volatile__(
+		"synco\n"
+		"1:\n\t"
+		"movli.l	@%2, r0\n\t"
+		"mov	r0, %1\n\t"
+		"cmp/eq	%1, %4\n\t"
+		"bf	2f\n\t"
+		"mov	%5, r0\n\t"
+		"movco.l	r0, @%2\n\t"
+		"bf	1b\n"
+		"2:\n\t"
+		"synco\n\t"
+#ifdef CONFIG_MMU
+		".section	.fixup,\"ax\"\n"
+		"3:\n\t"
+		"mov.l	4f, %0\n\t"
+		"jmp	@%0\n\t"
+		" mov	%3, %0\n\t"
+		".balign	4\n"
+		"4:	.long	2b\n\t"
+		".previous\n"
+		".section	__ex_table,\"a\"\n\t"
+		".long	1b, 3b\n\t"
+		".previous"
+#endif
+		:"+r" (err), "=&r" (*uval)
+		:"r" (uaddr), "i" (-EFAULT), "r" (oldval), "r" (newval)
+		:"t", "memory", "r0");
+	if (err) return err;
+	return 0;
+}
+
+#endif /* __ASM_SH_FUTEX_LLSC_H */
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 7be39a646fbd..d0078747d308 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -7,16 +7,34 @@
 #include <linux/uaccess.h>
 #include <asm/errno.h>
 
-/* XXX: UP variants, fix for SH-4A and SMP.. */
+#if !defined(CONFIG_SMP)
 #include <asm/futex-irq.h>
+#elif defined(CONFIG_CPU_J2)
+#include <asm/futex-cas.h>
+#elif defined(CONFIG_CPU_SH4A)
+#include <asm/futex-llsc.h>
+#else
+#error SMP not supported on this configuration.
+#endif
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
+}
 
 static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 {
 	int op = (encoded_op >> 28) & 7;
 	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret;
+	u32 oparg = (encoded_op << 8) >> 20;
+	u32 cmparg = (encoded_op << 20) >> 20;
+	u32 oldval, newval, prev;
+	int ret;
 
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
@@ -26,26 +44,39 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 	pagefault_disable();
 
-	switch (op) {
-	case FUTEX_OP_SET:
-		ret = atomic_futex_op_xchg_set(oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_ADD:
-		ret = atomic_futex_op_xchg_add(oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_OR:
-		ret = atomic_futex_op_xchg_or(oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_ANDN:
-		ret = atomic_futex_op_xchg_and(~oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_XOR:
-		ret = atomic_futex_op_xchg_xor(oparg, uaddr, &oldval);
-		break;
-	default:
-		ret = -ENOSYS;
-		break;
-	}
+	do {
+		if (op == FUTEX_OP_SET)
+			ret = oldval = 0;
+		else
+			ret = get_user(oldval, uaddr);
+
+		if (ret) break;
+
+		switch (op) {
+		case FUTEX_OP_SET:
+			newval = oparg;
+			break;
+		case FUTEX_OP_ADD:
+			newval = oldval + oparg;
+			break;
+		case FUTEX_OP_OR:
+			newval = oldval | oparg;
+			break;
+		case FUTEX_OP_ANDN:
+			newval = oldval & ~oparg;
+			break;
+		case FUTEX_OP_XOR:
+			newval = oldval ^ oparg;
+			break;
+		default:
+			ret = -ENOSYS;
+			break;
+		}
+
+		if (ret) break;
+
+		ret = futex_atomic_cmpxchg_inatomic(&prev, uaddr, oldval, newval);
+	} while (!ret && prev != oldval);
 
 	pagefault_enable();
 
@@ -53,10 +84,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 		switch (cmp) {
 		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
 		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = ((int)oldval < (int)cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = ((int)oldval >= (int)cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = ((int)oldval <= (int)cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = ((int)oldval > (int)cmparg); break;
 		default: ret = -ENOSYS;
 		}
 	}
@@ -64,15 +95,5 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 	return ret;
 }
 
-static inline int
-futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
-			      u32 oldval, u32 newval)
-{
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
-	return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
-}
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_FUTEX_H */
-- 
cgit v1.2.3


From 74bdaa611fa69368fb4032ad437af073d31116bd Mon Sep 17 00:00:00 2001
From: Yoshinori Sato <ysato@users.sourceforge.jp>
Date: Tue, 28 Jun 2016 15:49:46 +0900
Subject: sh: system call wire up

Signed-off-by: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Rich Felker <dalias@libc.org>
---
 arch/sh/include/uapi/asm/unistd_32.h | 16 +++++++++++++++-
 arch/sh/include/uapi/asm/unistd_64.h | 16 +++++++++++++++-
 arch/sh/kernel/syscalls_32.S         | 14 ++++++++++++++
 arch/sh/kernel/syscalls_64.S         | 14 ++++++++++++++
 4 files changed, 58 insertions(+), 2 deletions(-)

(limited to 'arch/sh/include')

diff --git a/arch/sh/include/uapi/asm/unistd_32.h b/arch/sh/include/uapi/asm/unistd_32.h
index d13a1d623736..c801bde9e6f5 100644
--- a/arch/sh/include/uapi/asm/unistd_32.h
+++ b/arch/sh/include/uapi/asm/unistd_32.h
@@ -380,7 +380,21 @@
 #define __NR_process_vm_writev	366
 #define __NR_kcmp		367
 #define __NR_finit_module	368
+#define __NR_sched_getattr	369
+#define __NR_sched_setattr	370
+#define __NR_renameat2		371
+#define __NR_seccomp		372
+#define __NR_getrandom		373
+#define __NR_memfd_create	374
+#define __NR_bpf		375
+#define __NR_execveat		376
+#define __NR_userfaultfd	377
+#define __NR_membarrier		378
+#define __NR_mlock2		379
+#define __NR_copy_file_range	380
+#define __NR_preadv2		381
+#define __NR_pwritev2		382
 
-#define NR_syscalls 369
+#define NR_syscalls 383
 
 #endif /* __ASM_SH_UNISTD_32_H */
diff --git a/arch/sh/include/uapi/asm/unistd_64.h b/arch/sh/include/uapi/asm/unistd_64.h
index 47ebd5b5ed55..ce0cb3598b62 100644
--- a/arch/sh/include/uapi/asm/unistd_64.h
+++ b/arch/sh/include/uapi/asm/unistd_64.h
@@ -400,7 +400,21 @@
 #define __NR_process_vm_writev	377
 #define __NR_kcmp		378
 #define __NR_finit_module	379
+#define __NR_sched_getattr	380
+#define __NR_sched_setattr	381
+#define __NR_renameat2		382
+#define __NR_seccomp		383
+#define __NR_getrandom		384
+#define __NR_memfd_create	385
+#define __NR_bpf		386
+#define __NR_execveat		387
+#define __NR_userfaultfd	388
+#define __NR_membarrier		389
+#define __NR_mlock2		390
+#define __NR_copy_file_range	391
+#define __NR_preadv2		392
+#define __NR_pwritev2		393
 
-#define NR_syscalls 380
+#define NR_syscalls 394
 
 #endif /* __ASM_SH_UNISTD_64_H */
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 734234be2f01..254bc22ee57d 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -386,3 +386,17 @@ ENTRY(sys_call_table)
 	.long sys_process_vm_writev
 	.long sys_kcmp
 	.long sys_finit_module
+	.long sys_sched_getattr
+	.long sys_sched_setattr		/* 370 */
+	.long sys_renameat2
+	.long sys_seccomp
+	.long sys_getrandom
+	.long sys_memfd_create
+	.long sys_bpf			/* 375 */
+	.long sys_execveat
+	.long sys_userfaultfd
+	.long sys_membarrier
+	.long sys_mlock2
+	.long sys_copy_file_range	/* 380 */
+	.long sys_preadv2
+	.long sys_pwritev2
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index 579fcb9a896b..d6a27f7a4c54 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -406,3 +406,17 @@ sys_call_table:
 	.long sys_process_vm_writev
 	.long sys_kcmp
 	.long sys_finit_module
+	.long sys_sched_getattr		/* 380 */
+	.long sys_sched_setattr
+	.long sys_renameat2
+	.long sys_seccomp
+	.long sys_getrandom
+	.long sys_memfd_create		/* 385 */
+	.long sys_bpf
+	.long sys_execveat
+	.long sys_userfaultfd
+	.long sys_membarrier
+	.long sys_mlock2		/* 390 */
+	.long sys_copy_file_range
+	.long sys_preadv2
+	.long sys_pwritev2
-- 
cgit v1.2.3