From 0366a1c70b89efed4f9d590216bb004a16effbed Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 23 Sep 2013 14:29:11 +1000
Subject: powerpc/irq: Run softirqs off the top of the irq stack

Nowadays, irq_exit() calls __do_softirq() pretty much directly
instead of calling do_softirq() which switches to the decicated
softirq stack.

This has lead to observed stack overflows on powerpc since we call
irq_enter() and irq_exit() outside of the scope that switches to
the irq stack.

This fixes it by moving the stack switching up a level, making
irq_enter() and irq_exit() run off the irq stack.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/irq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 0e40843a1c6e..41f13cec8a8f 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -69,9 +69,9 @@ extern struct thread_info *softirq_ctx[NR_CPUS];
 
 extern void irq_ctx_init(void);
 extern void call_do_softirq(struct thread_info *tp);
-extern int call_handle_irq(int irq, void *p1,
-			   struct thread_info *tp, void *func);
+extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
 extern void do_IRQ(struct pt_regs *regs);
+extern void __do_irq(struct pt_regs *regs);
 
 int irq_choose_cpu(const struct cpumask *mask);
 
-- 
cgit v1.2.3


From cbc9565ee82694dec31d8137dec975b83175183b Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 24 Sep 2013 15:17:21 +1000
Subject: powerpc: Remove ksp_limit on ppc64

We've been keeping that field in thread_struct for a while, it contains
the "limit" of the current stack pointer and is meant to be used for
detecting stack overflows.

It has a few problems however:

 - First, it was never actually *used* on 64-bit. Set and updated but
not actually exploited

 - When switching stack to/from irq and softirq stacks, it's update
is racy unless we hard disable interrupts, which is costly. This
is fine on 32-bit as we don't soft-disable there but not on 64-bit.

Thus rather than fixing 2 in order to implement 1 in some hypothetical
future, let's remove the code completely from 64-bit. In order to avoid
a clutter of ifdef's, we remove the updates from C code completely
during interrupt stack switching, and instead maintain it from the
asm helper that is used to do the stack switching in the first place.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/processor.h |  4 +---
 arch/powerpc/kernel/asm-offsets.c    |  3 ++-
 arch/powerpc/kernel/irq.c            | 12 ------------
 arch/powerpc/kernel/misc_32.S        | 16 ++++++++++++++++
 arch/powerpc/kernel/process.c        |  3 ++-
 arch/powerpc/lib/sstep.c             |  3 ++-
 6 files changed, 23 insertions(+), 18 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index e378cccfca55..ce4de5aed7b5 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -149,8 +149,6 @@ typedef struct {
 
 struct thread_struct {
 	unsigned long	ksp;		/* Kernel stack pointer */
-	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
-
 #ifdef CONFIG_PPC64
 	unsigned long	ksp_vsid;
 #endif
@@ -162,6 +160,7 @@ struct thread_struct {
 #endif
 #ifdef CONFIG_PPC32
 	void		*pgdir;		/* root of page-table tree */
+	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
 #endif
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	/*
@@ -321,7 +320,6 @@ struct thread_struct {
 #else
 #define INIT_THREAD  { \
 	.ksp = INIT_SP, \
-	.ksp_limit = INIT_SP_LIMIT, \
 	.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
 	.fs = KERNEL_DS, \
 	.fpr = {{0}}, \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d8958be5f31a..502c7a4e73f7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -80,10 +80,11 @@ int main(void)
 	DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr));
 #else
 	DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
+	DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
+	DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
 #endif /* CONFIG_PPC64 */
 
 	DEFINE(KSP, offsetof(struct thread_struct, ksp));
-	DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
 	DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
 #ifdef CONFIG_BOOKE
 	DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 2234a1276a77..57d286a78f86 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -496,7 +496,6 @@ void do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	struct thread_info *curtp, *irqtp;
-	unsigned long saved_sp_limit;
 
 	/* Switch to the irq stack to handle this */
 	curtp = current_thread_info();
@@ -509,12 +508,6 @@ void do_IRQ(struct pt_regs *regs)
 		return;
 	}
 
-	/* Adjust the stack limit */
-	saved_sp_limit = current->thread.ksp_limit;
-	current->thread.ksp_limit = (unsigned long)irqtp +
-		_ALIGN_UP(sizeof(struct thread_info), 16);
-
-
 	/* Prepare the thread_info in the irq stack */
 	irqtp->task = curtp->task;
 	irqtp->flags = 0;
@@ -526,7 +519,6 @@ void do_IRQ(struct pt_regs *regs)
 	call_do_irq(regs, irqtp);
 
 	/* Restore stack limit */
-	current->thread.ksp_limit = saved_sp_limit;
 	irqtp->task = NULL;
 
 	/* Copy back updates to the thread_info */
@@ -604,16 +596,12 @@ void irq_ctx_init(void)
 static inline void do_softirq_onstack(void)
 {
 	struct thread_info *curtp, *irqtp;
-	unsigned long saved_sp_limit = current->thread.ksp_limit;
 
 	curtp = current_thread_info();
 	irqtp = softirq_ctx[smp_processor_id()];
 	irqtp->task = curtp->task;
 	irqtp->flags = 0;
-	current->thread.ksp_limit = (unsigned long)irqtp +
-				    _ALIGN_UP(sizeof(struct thread_info), 16);
 	call_do_softirq(irqtp);
-	current->thread.ksp_limit = saved_sp_limit;
 	irqtp->task = NULL;
 
 	/* Set any flag that may have been set on the
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 7da3882a3622..2b0ad9845363 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -36,25 +36,41 @@
 
 	.text
 
+/*
+ * We store the saved ksp_limit in the unused part
+ * of the STACK_FRAME_OVERHEAD
+ */
 _GLOBAL(call_do_softirq)
 	mflr	r0
 	stw	r0,4(r1)
+	lwz	r10,THREAD+KSP_LIMIT(r2)
+	addi	r11,r3,THREAD_INFO_GAP
 	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
 	mr	r1,r3
+	stw	r10,8(r1)
+	stw	r11,THREAD+KSP_LIMIT(r2)
 	bl	__do_softirq
+	lwz	r10,8(r1)
 	lwz	r1,0(r1)
 	lwz	r0,4(r1)
+	stw	r10,THREAD+KSP_LIMIT(r2)
 	mtlr	r0
 	blr
 
 _GLOBAL(call_do_irq)
 	mflr	r0
 	stw	r0,4(r1)
+	lwz	r10,THREAD+KSP_LIMIT(r2)
+	addi	r11,r3,THREAD_INFO_GAP
 	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
 	mr	r1,r4
+	stw	r10,8(r1)
+	stw	r11,THREAD+KSP_LIMIT(r2)
 	bl	__do_irq
+	lwz	r10,8(r1)
 	lwz	r1,0(r1)
 	lwz	r0,4(r1)
+	stw	r10,THREAD+KSP_LIMIT(r2)
 	mtlr	r0
 	blr
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6f428da53e20..96d2fdf3aa9e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1000,9 +1000,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	kregs = (struct pt_regs *) sp;
 	sp -= STACK_FRAME_OVERHEAD;
 	p->thread.ksp = sp;
+#ifdef CONFIG_PPC32
 	p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
 				_ALIGN_UP(sizeof(struct thread_info), 16);
-
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	p->thread.ptrace_bps[0] = NULL;
 #endif
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index a7ee978fb860..b1faa1593c90 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1505,6 +1505,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		 */
 		if ((ra == 1) && !(regs->msr & MSR_PR) \
 			&& (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) {
+#ifdef CONFIG_PPC32
 			/*
 			 * Check if we will touch kernel sack overflow
 			 */
@@ -1513,7 +1514,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 				err = -EINVAL;
 				break;
 			}
-
+#endif /* CONFIG_PPC32 */
 			/*
 			 * Check if we already set since that means we'll
 			 * lose the previous value.
-- 
cgit v1.2.3


From a787870924dbd6f321661e06d4ec1c7a408c9ccf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Aug 2013 14:55:40 +0200
Subject: sched, arch: Create asm/preempt.h

In order to prepare to per-arch implementations of preempt_count move
the required bits into an asm-generic header and use this for all
archs.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-h5j0c1r3e3fk015m30h8f1zx@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/Kbuild      |  1 +
 arch/arc/include/asm/Kbuild        |  1 +
 arch/arm/include/asm/Kbuild        |  1 +
 arch/arm64/include/asm/Kbuild      |  1 +
 arch/avr32/include/asm/Kbuild      |  1 +
 arch/blackfin/include/asm/Kbuild   |  1 +
 arch/c6x/include/asm/Kbuild        |  1 +
 arch/cris/include/asm/Kbuild       |  1 +
 arch/frv/include/asm/Kbuild        |  1 +
 arch/h8300/include/asm/Kbuild      |  1 +
 arch/hexagon/include/asm/Kbuild    |  1 +
 arch/ia64/include/asm/Kbuild       |  1 +
 arch/m32r/include/asm/Kbuild       |  1 +
 arch/m68k/include/asm/Kbuild       |  1 +
 arch/metag/include/asm/Kbuild      |  1 +
 arch/microblaze/include/asm/Kbuild |  1 +
 arch/mips/include/asm/Kbuild       |  1 +
 arch/mn10300/include/asm/Kbuild    |  1 +
 arch/openrisc/include/asm/Kbuild   |  1 +
 arch/parisc/include/asm/Kbuild     |  1 +
 arch/powerpc/include/asm/Kbuild    |  1 +
 arch/s390/include/asm/Kbuild       |  1 +
 arch/score/include/asm/Kbuild      |  1 +
 arch/sh/include/asm/Kbuild         |  1 +
 arch/sparc/include/asm/Kbuild      |  1 +
 arch/tile/include/asm/Kbuild       |  1 +
 arch/um/include/asm/Kbuild         |  1 +
 arch/unicore32/include/asm/Kbuild  |  1 +
 arch/x86/include/asm/Kbuild        |  1 +
 arch/xtensa/include/asm/Kbuild     |  1 +
 include/asm-generic/preempt.h      | 54 ++++++++++++++++++++++++++++++++++++++
 include/linux/preempt.h            | 49 +---------------------------------
 32 files changed, 85 insertions(+), 48 deletions(-)
 create mode 100644 include/asm-generic/preempt.h

(limited to 'arch/powerpc/include')

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index a6e85f448c1c..f01fb505ad52 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 
 generic-y += exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index d8dd660898b9..5943f7f9d325 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -46,3 +46,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index d3db39860b9c..4e6838d4ddf6 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -33,3 +33,4 @@ generic-y += timex.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += unaligned.h
+generic-y += preempt.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 79a642d199f2..519f89f5b6a3 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -50,3 +50,4 @@ generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index d22af851f3f6..b946080ee8bb 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y	+= clkdev.h
 generic-y	+= exec.h
 generic-y	+= trace_clock.h
 generic-y	+= param.h
+generic-y += preempt.h
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 127826f8a375..f2b43474b0e2 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -44,3 +44,4 @@ generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index e49f918531ad..fc0b3c356027 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -56,3 +56,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index c8325455520e..b06caf649a95 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -11,3 +11,4 @@ generic-y += module.h
 generic-y += trace_clock.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index c5d767028306..74742dc6a3da 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -2,3 +2,4 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 8ada3cf0c98d..7e0e7213a481 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -6,3 +6,4 @@ generic-y += mmu.h
 generic-y += module.h
 generic-y += trace_clock.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 1da17caac23c..67c3450309b7 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -53,3 +53,4 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index a3456f34f672..f93ee087e8fe 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -3,4 +3,5 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += kvm_para.h
 generic-y += trace_clock.h
+generic-y += preempt.h
 generic-y += vtime.h
\ No newline at end of file
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index bebdc36ebb0a..2b58c5f0bc38 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += module.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 09d77a862da3..a5d27f272a59 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -31,3 +31,4 @@ generic-y += trace_clock.h
 generic-y += types.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index 6ae0ccb632cb..84d0c1d6b9b3 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -52,3 +52,4 @@ generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index d3c51a6a601d..ce0bbf8f5640 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
 generic-y += syscalls.h
+generic-y += preempt.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 454ddf9bb76f..1acbb8b77a71 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -11,5 +11,6 @@ generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
 generic-y += trace_clock.h
+generic-y += preempt.h
 generic-y += ucontext.h
 generic-y += xor.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index c5d767028306..74742dc6a3da 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -2,3 +2,4 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 195653e851da..78405625e799 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -67,3 +67,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index ff4c9faed546..a603b9ebe54c 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -4,3 +4,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \
 	  div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \
 	  poll.h xor.h clkdev.h exec.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 704e6f10ae80..d8f9d2f18a23 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -2,4 +2,5 @@
 generic-y += clkdev.h
 generic-y += rwsem.h
 generic-y += trace_clock.h
+generic-y += preempt.h
 generic-y += vtime.h
\ No newline at end of file
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index f313f9cbcf44..7a5288f3479a 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -2,3 +2,4 @@
 
 generic-y += clkdev.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index e1c7bb999b06..f3414ade77a3 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -4,3 +4,4 @@ header-y +=
 generic-y += clkdev.h
 generic-y += trace_clock.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 280bea9e5e2b..231efbb68108 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -34,3 +34,4 @@ generic-y += termios.h
 generic-y += trace_clock.h
 generic-y += ucontext.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 7e4a97fbded4..bf390667657a 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -16,3 +16,4 @@ generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += word-at-a-time.h
+generic-y += preempt.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 664d6ad23f80..22f3bd147fa7 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -38,3 +38,4 @@ generic-y += termios.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index b30f34a79882..fdde187e6087 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h
 generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h
 generic-y += switch_to.h clkdev.h
 generic-y += trace_clock.h
+generic-y += preempt.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 89d8b6c4e39a..00045cbe5c63 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -60,3 +60,4 @@ generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 7f669853317a..eca20286a91c 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,3 +5,4 @@ genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
 
 generic-y += clkdev.h
+generic-y += preempt.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 1b982641ec35..228d6aee3a16 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -28,3 +28,4 @@ generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
new file mode 100644
index 000000000000..a1fc6590a743
--- /dev/null
+++ b/include/asm-generic/preempt.h
@@ -0,0 +1,54 @@
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <linux/thread_info.h>
+
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
+static __always_inline int preempt_count(void)
+{
+	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline int *preempt_count_ptr(void)
+{
+	return &current_thread_info()->preempt_count;
+}
+
+/*
+ * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the
+ * alternative is loosing a reschedule. Better schedule too often -- also this
+ * should be a very rare operation.
+ */
+static __always_inline void preempt_count_set(int pc)
+{
+	*preempt_count_ptr() = pc;
+}
+
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
+{
+	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline void clear_preempt_need_resched(void)
+{
+	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline bool test_preempt_need_resched(void)
+{
+	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
+}
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 92e341853e4b..df8e245e8729 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -6,7 +6,6 @@
  * preempt_count (used for kernel preemption, interrupt count, etc.)
  */
 
-#include <linux/thread_info.h>
 #include <linux/linkage.h>
 #include <linux/list.h>
 
@@ -16,53 +15,7 @@
  */
 #define PREEMPT_NEED_RESCHED	0x80000000
 
-/*
- * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
- * that think a non-zero value indicates we cannot preempt.
- */
-static __always_inline int preempt_count(void)
-{
-	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline int *preempt_count_ptr(void)
-{
-	return &current_thread_info()->preempt_count;
-}
-
-/*
- * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the
- * alternative is loosing a reschedule. Better schedule too often -- also this
- * should be a very rare operation.
- */
-static __always_inline void preempt_count_set(int pc)
-{
-	*preempt_count_ptr() = pc;
-}
-
-/*
- * We fold the NEED_RESCHED bit into the preempt count such that
- * preempt_enable() can decrement and test for needing to reschedule with a
- * single instruction.
- *
- * We invert the actual bit, so that when the decrement hits 0 we know we both
- * need to resched (the bit is cleared) and can resched (no preempt count).
- */
-
-static __always_inline void set_preempt_need_resched(void)
-{
-	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline void clear_preempt_need_resched(void)
-{
-	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline bool test_preempt_need_resched(void)
-{
-	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
-}
+#include <asm/preempt.h>
 
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
-- 
cgit v1.2.3


From 62748f32d501f5d3712a7c372bbb92abc7c62bc7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 24 Sep 2013 08:20:52 -0700
Subject: net: introduce SO_MAX_PACING_RATE

As mentioned in commit afe4fd062416b ("pkt_sched: fq: Fair Queue packet
scheduler"), this patch adds a new socket option.

SO_MAX_PACING_RATE offers the application the ability to cap the
rate computed by transport layer. Value is in bytes per second.

u32 val = 1000000;
setsockopt(sockfd, SOL_SOCKET, SO_MAX_PACING_RATE, &val, sizeof(val));

To be effectively paced, a flow must use FQ packet scheduler.

Note that a packet scheduler takes into account the headers for its
computations. The effective payload rate depends on MSS and retransmits
if any.

I chose to make this pacing rate a SOL_SOCKET option instead of a
TCP one because this can be used by other protocols.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steinar H. Gunderson <sesse@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h   |  4 +++-
 arch/avr32/include/uapi/asm/socket.h   |  2 ++
 arch/cris/include/uapi/asm/socket.h    |  2 ++
 arch/frv/include/uapi/asm/socket.h     |  2 ++
 arch/h8300/include/uapi/asm/socket.h   |  2 ++
 arch/ia64/include/uapi/asm/socket.h    |  2 ++
 arch/m32r/include/uapi/asm/socket.h    |  2 ++
 arch/mips/include/uapi/asm/socket.h    |  2 ++
 arch/mn10300/include/uapi/asm/socket.h |  2 ++
 arch/parisc/include/uapi/asm/socket.h  |  2 ++
 arch/powerpc/include/uapi/asm/socket.h |  2 ++
 arch/s390/include/uapi/asm/socket.h    |  2 ++
 arch/sparc/include/uapi/asm/socket.h   |  2 ++
 arch/xtensa/include/uapi/asm/socket.h  |  2 ++
 include/net/sock.h                     |  1 +
 include/uapi/asm-generic/socket.h      |  2 ++
 net/core/sock.c                        | 12 ++++++++++++
 net/ipv4/tcp_input.c                   |  2 +-
 18 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 467de010ea7e..e3a1491d5073 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -81,6 +81,8 @@
 
 #define SO_SELECT_ERR_QUEUE	45
 
-#define SO_BUSY_POLL			46
+#define SO_BUSY_POLL		46
+
+#define SO_MAX_PACING_RATE	47
 
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 11c4259c62fb..439936421434 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -76,4 +76,6 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index eb723e51554e..13829aaaeec5 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -78,6 +78,8 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index f0cb1c341163..5d4299762426 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -76,5 +76,7 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h
index 9490758c5e2b..214ccaf3554a 100644
--- a/arch/h8300/include/uapi/asm/socket.h
+++ b/arch/h8300/include/uapi/asm/socket.h
@@ -76,4 +76,6 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 556d0701a155..c25302fb48d9 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -85,4 +85,6 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 24be7c8da86a..52966650114f 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -76,4 +76,6 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 61c01f054d1b..0df9787cd84d 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -94,4 +94,6 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index e2a2b203eb00..71dedcae55a6 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -76,4 +76,6 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 71700e636a8e..7c614d01f1fa 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -75,6 +75,8 @@
 
 #define SO_BUSY_POLL		0x4027
 
+#define SO_MAX_PACING_RATE	0x4048
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index a6d74467c9ed..fa698324a1fd 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -83,4 +83,6 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 92494494692e..c286c2e868f0 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -82,4 +82,6 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 4e1d66c3ce71..0f21e9a5ca18 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -72,6 +72,8 @@
 
 #define SO_BUSY_POLL		0x0030
 
+#define SO_MAX_PACING_RATE	0x0031
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index c114483010c1..7db5c22faa68 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -87,4 +87,6 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 4625d2eff461..240aa3f08cd6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -363,6 +363,7 @@ struct sock {
 	int			sk_wmem_queued;
 	gfp_t			sk_allocation;
 	u32			sk_pacing_rate; /* bytes per second */
+	u32			sk_max_pacing_rate;
 	netdev_features_t	sk_route_caps;
 	netdev_features_t	sk_route_nocaps;
 	int			sk_gso_type;
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index f04b69b6abf2..38f14d0264c3 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -78,4 +78,6 @@
 
 #define SO_BUSY_POLL		46
 
+#define SO_MAX_PACING_RATE	47
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 5b6beba494a3..2bd9b3faa0d0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -914,6 +914,13 @@ set_rcvbuf:
 		}
 		break;
 #endif
+
+	case SO_MAX_PACING_RATE:
+		sk->sk_max_pacing_rate = val;
+		sk->sk_pacing_rate = min(sk->sk_pacing_rate,
+					 sk->sk_max_pacing_rate);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1177,6 +1184,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 #endif
 
+	case SO_MAX_PACING_RATE:
+		v.val = sk->sk_max_pacing_rate;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -2319,6 +2330,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_ll_usec		=	sysctl_net_busy_read;
 #endif
 
+	sk->sk_max_pacing_rate = ~0U;
 	/*
 	 * Before updating sk_refcnt, we must commit prior changes to memory
 	 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5d083855c111..66aa816ad30b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -735,7 +735,7 @@ static void tcp_update_pacing_rate(struct sock *sk)
 	if (tp->srtt > 8 + 2)
 		do_div(rate, tp->srtt);
 
-	sk->sk_pacing_rate = min_t(u64, rate, ~0U);
+	sk->sk_pacing_rate = min_t(u64, rate, sk->sk_max_pacing_rate);
 }
 
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
-- 
cgit v1.2.3


From 25ff79443cbfa924b8df1d4a8a0fbff83816938a Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Sat, 7 Sep 2013 14:07:11 -0500
Subject: of: implement pci_address_to_pio as weak function

Implement pci_address_to_pio as weak function to remove the dependency on
asm/prom.h. This is in preparation to make prom.h optional.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Grant Likely <grant.likely@linaro.org>
---
 arch/microblaze/include/asm/prom.h |  9 ---------
 arch/mips/include/asm/prom.h       | 11 -----------
 arch/powerpc/include/asm/prom.h    |  5 -----
 arch/x86/include/asm/prom.h        |  3 ---
 arch/x86/kernel/devicetree.c       | 10 ----------
 drivers/of/address.c               |  8 ++++++++
 drivers/of/of_pci.c                |  1 -
 include/linux/of_address.h         |  5 +----
 8 files changed, 9 insertions(+), 43 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index f05bedce70d0..0ebd924902df 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -26,13 +26,4 @@ enum early_consoles {
 
 extern int of_early_console(void *version);
 
-/*
- * OF address retreival & translation
- */
-
-#ifdef CONFIG_PCI
-extern unsigned long pci_address_to_pio(phys_addr_t address);
-#define pci_address_to_pio pci_address_to_pio
-#endif	/* CONFIG_PCI */
-
 #endif /* _ASM_MICROBLAZE_PROM_H */
diff --git a/arch/mips/include/asm/prom.h b/arch/mips/include/asm/prom.h
index e3dbd0e0608e..ccd2b75f152c 100644
--- a/arch/mips/include/asm/prom.h
+++ b/arch/mips/include/asm/prom.h
@@ -19,17 +19,6 @@
 
 extern void device_tree_init(void);
 
-static inline unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	/*
-	 * The ioport address can be directly used by inX() / outX()
-	 */
-	BUG_ON(address > IO_SPACE_LIMIT);
-
-	return (unsigned long) address;
-}
-#define pci_address_to_pio pci_address_to_pio
-
 struct boot_param_header;
 
 extern void __dt_setup_arch(struct boot_param_header *bph);
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 7d0c7f3a7171..bd215f74df0f 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -30,11 +30,6 @@
 extern u64 of_translate_dma_address(struct device_node *dev,
 				    const __be32 *in_addr);
 
-#ifdef CONFIG_PCI
-extern unsigned long pci_address_to_pio(phys_addr_t address);
-#define pci_address_to_pio pci_address_to_pio
-#endif	/* CONFIG_PCI */
-
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
  * size parameters.
  */
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index bade6ac3b14f..8ef2ec70858f 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -39,9 +39,6 @@ static inline void x86_dtb_init(void) { }
 
 extern char cmd_line[COMMAND_LINE_SIZE];
 
-#define pci_address_to_pio pci_address_to_pio
-unsigned long pci_address_to_pio(phys_addr_t addr);
-
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 679d676a30d7..cffd07368547 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -27,16 +27,6 @@ char __initdata cmd_line[COMMAND_LINE_SIZE];
 
 int __initdata of_ioapic;
 
-unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	/*
-	 * The ioport address can be directly used by inX / outX
-	 */
-	BUG_ON(address >= (1 << 16));
-	return (unsigned long)address;
-}
-EXPORT_SYMBOL_GPL(pci_address_to_pio);
-
 void __init early_init_dt_scan_chosen_arch(unsigned long node)
 {
 	BUG();
diff --git a/drivers/of/address.c b/drivers/of/address.c
index b55c21890760..556a7fb6ead3 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -626,6 +626,14 @@ const __be32 *of_get_address(struct device_node *dev, int index, u64 *size,
 }
 EXPORT_SYMBOL(of_get_address);
 
+unsigned long __weak pci_address_to_pio(phys_addr_t address)
+{
+	if (address > IO_SPACE_LIMIT)
+		return (unsigned long)-1;
+
+	return (unsigned long) address;
+}
+
 static int __of_address_to_resource(struct device_node *dev,
 		const __be32 *addrp, u64 size, unsigned int flags,
 		const char *name, struct resource *r)
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index e5ca00893c0c..848199633798 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -2,7 +2,6 @@
 #include <linux/export.h>
 #include <linux/of.h>
 #include <linux/of_pci.h>
-#include <asm/prom.h>
 
 static inline int __of_pci_pci_compare(struct device_node *node,
 				       unsigned int data)
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 4c2e6f26432c..f6fc6899ceae 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -52,10 +52,7 @@ extern void __iomem *of_iomap(struct device_node *device, int index);
 extern const __be32 *of_get_address(struct device_node *dev, int index,
 			   u64 *size, unsigned int *flags);
 
-#ifndef pci_address_to_pio
-static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
-#define pci_address_to_pio pci_address_to_pio
-#endif
+extern unsigned long pci_address_to_pio(phys_addr_t addr);
 
 extern int of_pci_range_parser_init(struct of_pci_range_parser *parser,
 			struct device_node *node);
-- 
cgit v1.2.3


From 0c3f061c195ceb891067b6de9e4ecc347c4dea31 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Tue, 17 Sep 2013 10:42:50 -0500
Subject: of: implement of_node_to_nid as a weak function

Implement of_node_to_nid as weak function to remove the dependency on
asm/prom.h. This is in preparation to make prom.h optional.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 arch/powerpc/include/asm/prom.h |  7 -------
 arch/sparc/include/asm/prom.h   |  4 ----
 drivers/of/base.c               |  7 +++++++
 include/linux/of.h              | 11 ++++-------
 4 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index bd215f74df0f..6707c16d8fc5 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -42,13 +42,6 @@ extern void kdump_move_device_tree(void);
 /* cache lookup */
 struct device_node *of_find_next_cache_node(struct device_node *np);
 
-#ifdef CONFIG_NUMA
-extern int of_node_to_nid(struct device_node *device);
-#else
-static inline int of_node_to_nid(struct device_node *device) { return 0; }
-#endif
-#define of_node_to_nid of_node_to_nid
-
 extern void of_instantiate_rtc(void);
 
 extern int of_get_ibm_chip_id(struct device_node *np);
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index 67c62578d170..60c8d7bd4058 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -43,10 +43,6 @@ extern int of_getintprop_default(struct device_node *np,
 				 const char *name,
 				 int def);
 extern int of_find_in_proplist(const char *list, const char *match, int len);
-#ifdef CONFIG_NUMA
-extern int of_node_to_nid(struct device_node *dp);
-#define of_node_to_nid of_node_to_nid
-#endif
 
 extern void prom_build_devicetree(void);
 extern void of_populate_present_mask(void);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 865d3f66c86b..ced4c06d79b3 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -74,6 +74,13 @@ int of_n_size_cells(struct device_node *np)
 }
 EXPORT_SYMBOL(of_n_size_cells);
 
+#ifdef CONFIG_NUMA
+int __weak of_node_to_nid(struct device_node *np)
+{
+	return numa_node_id();
+}
+#endif
+
 #if defined(CONFIG_OF_DYNAMIC)
 /**
  *	of_node_get - Increment refcount of a node
diff --git a/include/linux/of.h b/include/linux/of.h
index f95aee391e30..4d294a0b8a57 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -534,13 +534,10 @@ static inline const char *of_prop_next_string(struct property *prop,
 #define of_match_node(_matches, _node)	NULL
 #endif /* CONFIG_OF */
 
-#ifndef of_node_to_nid
-static inline int of_node_to_nid(struct device_node *np)
-{
-	return numa_node_id();
-}
-
-#define of_node_to_nid of_node_to_nid
+#if defined(CONFIG_OF) && defined(CONFIG_NUMA)
+extern int of_node_to_nid(struct device_node *np);
+#else
+static inline int of_node_to_nid(struct device_node *device) { return 0; }
 #endif
 
 /**
-- 
cgit v1.2.3


From 32df8dca503f82c816f8be85a8d0a394a8b88c2c Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Sat, 7 Sep 2013 14:11:58 -0500
Subject: of: remove HAVE_ARCH_DEVTREE_FIXUPS

HAVE_ARCH_DEVTREE_FIXUPS appears to always be needed except for sparc,
but it is only used for /proc/device-teee and sparc does not enable
/proc/device-tree. So this option is redundant. Remove the option and
always enable it. This has the side effect of fixing /proc/device-tree
on arches such as arm64 which failed to define this option.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Chris Zankel <chris@zankel.net>
Cc: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/arc/include/asm/prom.h        | 2 --
 arch/arm/include/asm/prom.h        | 2 --
 arch/metag/include/asm/prom.h      | 1 -
 arch/microblaze/include/asm/prom.h | 2 --
 arch/openrisc/include/asm/prom.h   | 2 --
 arch/powerpc/include/asm/prom.h    | 2 --
 arch/x86/include/asm/prom.h        | 2 --
 arch/xtensa/include/asm/prom.h     | 2 --
 fs/proc/proc_devtree.c             | 3 ---
 9 files changed, 18 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/arc/include/asm/prom.h b/arch/arc/include/asm/prom.h
index 692d0d0789a7..15555499e430 100644
--- a/arch/arc/include/asm/prom.h
+++ b/arch/arc/include/asm/prom.h
@@ -9,6 +9,4 @@
 #ifndef _ASM_ARC_PROM_H_
 #define _ASM_ARC_PROM_H_
 
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
 #endif
diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h
index 4a2985e21969..b681575ad3de 100644
--- a/arch/arm/include/asm/prom.h
+++ b/arch/arm/include/asm/prom.h
@@ -11,8 +11,6 @@
 #ifndef __ASMARM_PROM_H
 #define __ASMARM_PROM_H
 
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
 #ifdef CONFIG_OF
 
 extern const struct machine_desc *setup_machine_fdt(unsigned int dt_phys);
diff --git a/arch/metag/include/asm/prom.h b/arch/metag/include/asm/prom.h
index e19022c5a0cb..d4be144a8613 100644
--- a/arch/metag/include/asm/prom.h
+++ b/arch/metag/include/asm/prom.h
@@ -14,7 +14,6 @@
 #ifndef __ASM_METAG_PROM_H
 #define __ASM_METAG_PROM_H
 
-#define HAVE_ARCH_DEVTREE_FIXUPS
 
 extern void copy_fdt(void);
 
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 0ebd924902df..2f03ac815851 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -16,8 +16,6 @@
 
 #include <linux/of.h>
 
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
 /* Other Prototypes */
 enum early_consoles {
 	UARTLITE = 1,
diff --git a/arch/openrisc/include/asm/prom.h b/arch/openrisc/include/asm/prom.h
index 93c9980e1b6b..bec477ba9f7d 100644
--- a/arch/openrisc/include/asm/prom.h
+++ b/arch/openrisc/include/asm/prom.h
@@ -17,6 +17,4 @@
 #ifndef _ASM_OPENRISC_PROM_H
 #define _ASM_OPENRISC_PROM_H
 
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
 #endif /* _ASM_OPENRISC_PROM_H */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 6707c16d8fc5..43fe0023d722 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -20,8 +20,6 @@
 #include <asm/irq.h>
 #include <linux/atomic.h>
 
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
 /*
  * OF address retreival & translation
  */
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 8ef2ec70858f..fbeb06ed0eaa 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -39,7 +39,5 @@ static inline void x86_dtb_init(void) { }
 
 extern char cmd_line[COMMAND_LINE_SIZE];
 
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/xtensa/include/asm/prom.h b/arch/xtensa/include/asm/prom.h
index f3d7cd2c0de7..2a87a583b41b 100644
--- a/arch/xtensa/include/asm/prom.h
+++ b/arch/xtensa/include/asm/prom.h
@@ -1,6 +1,4 @@
 #ifndef _XTENSA_ASM_PROM_H
 #define _XTENSA_ASM_PROM_H
 
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
 #endif /* _XTENSA_ASM_PROM_H */
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 106a83570630..70779b2fc209 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -14,16 +14,13 @@
 #include <linux/of.h>
 #include <linux/export.h>
 #include <linux/slab.h>
-#include <asm/prom.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
 static inline void set_node_proc_entry(struct device_node *np,
 				       struct proc_dir_entry *de)
 {
-#ifdef HAVE_ARCH_DEVTREE_FIXUPS
 	np->pde = de;
-#endif
 }
 
 static struct proc_dir_entry *proc_device_tree;
-- 
cgit v1.2.3


From d0dfa16a600190d142f7538e5909d13c35b60d98 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 16 Sep 2013 21:05:05 -0500
Subject: of: move of_translate_dma_address to of_address.h

of_translate_dma_address is implemented in common code, so move the
declaration there too.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/include/asm/prom.h | 4 ----
 include/linux/of_address.h      | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 43fe0023d722..b8774bdc69e0 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -24,10 +24,6 @@
  * OF address retreival & translation
  */
 
-/* Translate a DMA address from device space to CPU space */
-extern u64 of_translate_dma_address(struct device_node *dev,
-				    const __be32 *in_addr);
-
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
  * size parameters.
  */
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index e8a179773a1a..5f6ed6b182b8 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -34,6 +34,10 @@ static inline void of_pci_range_to_resource(struct of_pci_range *range,
 	res->name = np->full_name;
 }
 
+/* Translate a DMA address from device space to CPU space */
+extern u64 of_translate_dma_address(struct device_node *dev,
+				    const __be32 *in_addr);
+
 #ifdef CONFIG_OF_ADDRESS
 extern u64 of_translate_address(struct device_node *np, const __be32 *addr);
 extern bool of_can_translate_address(struct device_node *dev);
-- 
cgit v1.2.3


From 5c19c5c6d4f5e1dc0d0e26b683bc820dda01fe06 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Tue, 17 Sep 2013 14:34:00 -0500
Subject: powerpc: clean-up include ordering in prom.h

Now that the core OF headers don't depend on prom.h, rearrange the
includes. There are still lots of implicit includes in the powerpc tree,
so the includes of OF headers are still necessary.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 arch/powerpc/include/asm/prom.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index b8774bdc69e0..7687f82a3217 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -1,4 +1,3 @@
-#include <linux/of.h>	/* linux/of.h gets to determine #include ordering */
 #ifndef _POWERPC_PROM_H
 #define _POWERPC_PROM_H
 #ifdef __KERNEL__
@@ -20,6 +19,13 @@
 #include <asm/irq.h>
 #include <linux/atomic.h>
 
+/* These includes should be removed once implicit includes are cleaned up. */
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
 /*
  * OF address retreival & translation
  */
@@ -125,14 +131,5 @@ struct of_drconf_cell {
  */
 extern unsigned char ibm_architecture_vec[];
 
-/* These includes are put at the bottom because they may contain things
- * that are overridden by this file.  Ideally they shouldn't be included
- * by this file, but there are a bunch of .c files that currently depend
- * on it.  Eventually they will be cleaned up. */
-#include <linux/of_fdt.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/platform_device.h>
-
 #endif /* __KERNEL__ */
 #endif /* _POWERPC_PROM_H */
-- 
cgit v1.2.3


From 3f0116c3238a96bc18ad4b4acefe4e7be32fa861 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 10 Oct 2013 10:16:30 +0200
Subject: compiler/gcc4: Add quirk for 'asm goto' miscompilation bug

Fengguang Wu, Oleg Nesterov and Peter Zijlstra tracked down
a kernel crash to a GCC bug: GCC miscompiles certain 'asm goto'
constructs, as outlined here:

  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670

Implement a workaround suggested by Jakub Jelinek.

Reported-and-tested-by: Fengguang Wu <fengguang.wu@intel.com>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Suggested-by: Jakub Jelinek <jakub@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/jump_label.h     |  2 +-
 arch/mips/include/asm/jump_label.h    |  2 +-
 arch/powerpc/include/asm/jump_label.h |  2 +-
 arch/s390/include/asm/jump_label.h    |  2 +-
 arch/sparc/include/asm/jump_label.h   |  2 +-
 arch/x86/include/asm/cpufeature.h     |  6 +++---
 arch/x86/include/asm/jump_label.h     |  2 +-
 arch/x86/include/asm/mutex_64.h       |  4 ++--
 include/linux/compiler-gcc4.h         | 15 +++++++++++++++
 9 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index bfc198c75913..863c892b4aaa 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -16,7 +16,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("1:\n\t"
+	asm_volatile_goto("1:\n\t"
 		 JUMP_LABEL_NOP "\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 ".word 1b, %l[l_yes], %c0\n\t"
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 4d6d77ed9b9d..e194f957ca8c 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -22,7 +22,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("1:\tnop\n\t"
+	asm_volatile_goto("1:\tnop\n\t"
 		"nop\n\t"
 		".pushsection __jump_table,  \"aw\"\n\t"
 		WORD_INSN " 1b, %l[l_yes], %0\n\t"
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index ae098c438f00..f016bb699b5f 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -19,7 +19,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("1:\n\t"
+	asm_volatile_goto("1:\n\t"
 		 "nop\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 6c32190dc73e..346b1c85ffb4 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -15,7 +15,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("0:	brcl 0,0\n"
+	asm_volatile_goto("0:	brcl 0,0\n"
 		".pushsection __jump_table, \"aw\"\n"
 		ASM_ALIGN "\n"
 		ASM_PTR " 0b, %l[label], %0\n"
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 5080d16a832f..ec2e2e2aba7d 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -9,7 +9,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-		asm goto("1:\n\t"
+		asm_volatile_goto("1:\n\t"
 			 "nop\n\t"
 			 "nop\n\t"
 			 ".pushsection __jump_table,  \"aw\"\n\t"
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index d3f5c63078d8..89270b4318db 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 		 * Catch too early usage of this before alternatives
 		 * have run.
 		 */
-		asm goto("1: jmp %l[t_warn]\n"
+		asm_volatile_goto("1: jmp %l[t_warn]\n"
 			 "2:\n"
 			 ".section .altinstructions,\"a\"\n"
 			 " .long 1b - .\n"
@@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 
 #endif
 
-		asm goto("1: jmp %l[t_no]\n"
+		asm_volatile_goto("1: jmp %l[t_no]\n"
 			 "2:\n"
 			 ".section .altinstructions,\"a\"\n"
 			 " .long 1b - .\n"
@@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
  * have. Thus, we force the jump to the widest, 4-byte, signed relative
  * offset even though the last would often fit in less bytes.
  */
-		asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+		asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
 			 "2:\n"
 			 ".section .altinstructions,\"a\"\n"
 			 " .long 1b - .\n"		/* src offset */
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 64507f35800c..6a2cefb4395a 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -18,7 +18,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("1:"
+	asm_volatile_goto("1:"
 		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
 		".pushsection __jump_table,  \"aw\" \n\t"
 		_ASM_ALIGN "\n\t"
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index e7e6751648ed..07537a44216e 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -20,7 +20,7 @@
 static inline void __mutex_fastpath_lock(atomic_t *v,
 					 void (*fail_fn)(atomic_t *))
 {
-	asm volatile goto(LOCK_PREFIX "   decl %0\n"
+	asm_volatile_goto(LOCK_PREFIX "   decl %0\n"
 			  "   jns %l[exit]\n"
 			  : : "m" (v->counter)
 			  : "memory", "cc"
@@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
 static inline void __mutex_fastpath_unlock(atomic_t *v,
 					   void (*fail_fn)(atomic_t *))
 {
-	asm volatile goto(LOCK_PREFIX "   incl %0\n"
+	asm_volatile_goto(LOCK_PREFIX "   incl %0\n"
 			  "   jg %l[exit]\n"
 			  : : "m" (v->counter)
 			  : "memory", "cc"
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 842de225055f..ded429966c1f 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -65,6 +65,21 @@
 #define __visible __attribute__((externally_visible))
 #endif
 
+/*
+ * GCC 'asm goto' miscompiles certain code sequences:
+ *
+ *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
+ * Fixed in GCC 4.8.2 and later versions.
+ *
+ * (asm goto is automatically volatile - the naming reflects this.)
+ */
+#if GCC_VERSION <= 40801
+# define asm_volatile_goto(x...)	do { asm goto(x); asm (""); } while (0)
+#else
+# define asm_volatile_goto(x...)	do { asm goto(x); } while (0)
+#endif
 
 #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
 #if GCC_VERSION >= 40400
-- 
cgit v1.2.3


From 12f04f2be80dd8d9da24534828f3ab3189ca5af2 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 23 Sep 2013 12:04:36 +1000
Subject: powerpc: Book 3S MMU little endian support

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/mmu-hash64.h |  4 +--
 arch/powerpc/mm/hash_native_64.c      | 46 ++++++++++++++++++++---------------
 arch/powerpc/mm/hash_utils_64.c       | 38 ++++++++++++++---------------
 3 files changed, 46 insertions(+), 42 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index c4cf01197273..807014dde821 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -135,8 +135,8 @@ extern char initial_stab[];
 #ifndef __ASSEMBLY__
 
 struct hash_pte {
-	unsigned long v;
-	unsigned long r;
+	__be64 v;
+	__be64 r;
 };
 
 extern struct hash_pte *htab_address;
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index c33d939120c9..3ea26c25590b 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -35,7 +35,11 @@
 #define DBG_LOW(fmt...)
 #endif
 
+#ifdef __BIG_ENDIAN__
 #define HPTE_LOCK_BIT 3
+#else
+#define HPTE_LOCK_BIT (56+3)
+#endif
 
 DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
@@ -172,7 +176,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
 
 static inline void native_lock_hpte(struct hash_pte *hptep)
 {
-	unsigned long *word = &hptep->v;
+	unsigned long *word = (unsigned long *)&hptep->v;
 
 	while (1) {
 		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
@@ -184,7 +188,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
 
 static inline void native_unlock_hpte(struct hash_pte *hptep)
 {
-	unsigned long *word = &hptep->v;
+	unsigned long *word = (unsigned long *)&hptep->v;
 
 	clear_bit_unlock(HPTE_LOCK_BIT, word);
 }
@@ -204,10 +208,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 	}
 
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
-		if (! (hptep->v & HPTE_V_VALID)) {
+		if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
 			/* retry with lock held */
 			native_lock_hpte(hptep);
-			if (! (hptep->v & HPTE_V_VALID))
+			if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
 				break;
 			native_unlock_hpte(hptep);
 		}
@@ -226,14 +230,14 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 			i, hpte_v, hpte_r);
 	}
 
-	hptep->r = hpte_r;
+	hptep->r = cpu_to_be64(hpte_r);
 	/* Guarantee the second dword is visible before the valid bit */
 	eieio();
 	/*
 	 * Now set the first dword including the valid bit
 	 * NOTE: this also unlocks the hpte
 	 */
-	hptep->v = hpte_v;
+	hptep->v = cpu_to_be64(hpte_v);
 
 	__asm__ __volatile__ ("ptesync" : : : "memory");
 
@@ -254,12 +258,12 @@ static long native_hpte_remove(unsigned long hpte_group)
 
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
 		hptep = htab_address + hpte_group + slot_offset;
-		hpte_v = hptep->v;
+		hpte_v = be64_to_cpu(hptep->v);
 
 		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
 			/* retry with lock held */
 			native_lock_hpte(hptep);
-			hpte_v = hptep->v;
+			hpte_v = be64_to_cpu(hptep->v);
 			if ((hpte_v & HPTE_V_VALID)
 			    && !(hpte_v & HPTE_V_BOLTED))
 				break;
@@ -294,7 +298,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 
 	native_lock_hpte(hptep);
 
-	hpte_v = hptep->v;
+	hpte_v = be64_to_cpu(hptep->v);
 	/*
 	 * We need to invalidate the TLB always because hpte_remove doesn't do
 	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -308,8 +312,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 	} else {
 		DBG_LOW(" -> hit\n");
 		/* Update the HPTE */
-		hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
-			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
+		hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) |
+			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));
 	}
 	native_unlock_hpte(hptep);
 
@@ -334,7 +338,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
 	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
 		hptep = htab_address + slot;
-		hpte_v = hptep->v;
+		hpte_v = be64_to_cpu(hptep->v);
 
 		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
 			/* HPTE matches */
@@ -369,8 +373,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 	hptep = htab_address + slot;
 
 	/* Update the HPTE */
-	hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
-		(newpp & (HPTE_R_PP | HPTE_R_N));
+	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
+			~(HPTE_R_PP | HPTE_R_N)) |
+		(newpp & (HPTE_R_PP | HPTE_R_N)));
 	/*
 	 * Ensure it is out of the tlb too. Bolted entries base and
 	 * actual page size will be same.
@@ -392,7 +397,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
 
 	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
 	native_lock_hpte(hptep);
-	hpte_v = hptep->v;
+	hpte_v = be64_to_cpu(hptep->v);
 
 	/*
 	 * We need to invalidate the TLB always because hpte_remove doesn't do
@@ -458,7 +463,7 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
 		hptep = htab_address + slot;
 		want_v = hpte_encode_avpn(vpn, psize, ssize);
 		native_lock_hpte(hptep);
-		hpte_v = hptep->v;
+		hpte_v = be64_to_cpu(hptep->v);
 
 		/* Even if we miss, we need to invalidate the TLB */
 		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
@@ -519,11 +524,12 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			int *psize, int *apsize, int *ssize, unsigned long *vpn)
 {
 	unsigned long avpn, pteg, vpi;
-	unsigned long hpte_v = hpte->v;
+	unsigned long hpte_v = be64_to_cpu(hpte->v);
+	unsigned long hpte_r = be64_to_cpu(hpte->r);
 	unsigned long vsid, seg_off;
 	int size, a_size, shift;
 	/* Look at the 8 bit LP value */
-	unsigned int lp = (hpte->r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+	unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
 
 	if (!(hpte_v & HPTE_V_LARGE)) {
 		size   = MMU_PAGE_4K;
@@ -612,7 +618,7 @@ static void native_hpte_clear(void)
 		 * running,  right?  and for crash dump, we probably
 		 * don't want to wait for a maybe bad cpu.
 		 */
-		hpte_v = hptep->v;
+		hpte_v = be64_to_cpu(hptep->v);
 
 		/*
 		 * Call __tlbie() here rather than tlbie() since we
@@ -664,7 +670,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 			hptep = htab_address + slot;
 			want_v = hpte_encode_avpn(vpn, psize, ssize);
 			native_lock_hpte(hptep);
-			hpte_v = hptep->v;
+			hpte_v = be64_to_cpu(hptep->v);
 			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
 			    !(hpte_v & HPTE_V_VALID))
 				native_unlock_hpte(hptep);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index bde8b5589755..6176b3cdf579 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -251,19 +251,18 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
 					 void *data)
 {
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	u32 *prop;
+	__be32 *prop;
 	unsigned long size = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return 0;
 
-	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,processor-segment-sizes",
-					  &size);
+	prop = of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", &size);
 	if (prop == NULL)
 		return 0;
 	for (; size >= 4; size -= 4, ++prop) {
-		if (prop[0] == 40) {
+		if (be32_to_cpu(prop[0]) == 40) {
 			DBG("1T segment support detected\n");
 			cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
 			return 1;
@@ -307,23 +306,22 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 					  void *data)
 {
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	u32 *prop;
+	__be32 *prop;
 	unsigned long size = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return 0;
 
-	prop = (u32 *)of_get_flat_dt_prop(node,
-					  "ibm,segment-page-sizes", &size);
+	prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size);
 	if (prop != NULL) {
 		pr_info("Page sizes from device-tree:\n");
 		size /= 4;
 		cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
 		while(size > 0) {
-			unsigned int base_shift = prop[0];
-			unsigned int slbenc = prop[1];
-			unsigned int lpnum = prop[2];
+			unsigned int base_shift = be32_to_cpu(prop[0]);
+			unsigned int slbenc = be32_to_cpu(prop[1]);
+			unsigned int lpnum = be32_to_cpu(prop[2]);
 			struct mmu_psize_def *def;
 			int idx, base_idx;
 
@@ -356,8 +354,8 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 				def->tlbiel = 0;
 
 			while (size > 0 && lpnum) {
-				unsigned int shift = prop[0];
-				int penc  = prop[1];
+				unsigned int shift = be32_to_cpu(prop[0]);
+				int penc  = be32_to_cpu(prop[1]);
 
 				prop += 2; size -= 2;
 				lpnum--;
@@ -390,8 +388,8 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
 					const char *uname, int depth,
 					void *data) {
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	unsigned long *addr_prop;
-	u32 *page_count_prop;
+	__be64 *addr_prop;
+	__be32 *page_count_prop;
 	unsigned int expected_pages;
 	long unsigned int phys_addr;
 	long unsigned int block_size;
@@ -405,12 +403,12 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
 	page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
 	if (page_count_prop == NULL)
 		return 0;
-	expected_pages = (1 << page_count_prop[0]);
+	expected_pages = (1 << be32_to_cpu(page_count_prop[0]));
 	addr_prop = of_get_flat_dt_prop(node, "reg", NULL);
 	if (addr_prop == NULL)
 		return 0;
-	phys_addr = addr_prop[0];
-	block_size = addr_prop[1];
+	phys_addr = be64_to_cpu(addr_prop[0]);
+	block_size = be64_to_cpu(addr_prop[1]);
 	if (block_size != (16 * GB))
 		return 0;
 	printk(KERN_INFO "Huge page(16GB) memory: "
@@ -534,16 +532,16 @@ static int __init htab_dt_scan_pftsize(unsigned long node,
 				       void *data)
 {
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	u32 *prop;
+	__be32 *prop;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return 0;
 
-	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
+	prop = of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
 	if (prop != NULL) {
 		/* pft_size[0] is the NUMA CEC cookie */
-		ppc64_pft_size = prop[1];
+		ppc64_pft_size = be32_to_cpu(prop[1]);
 		return 1;
 	}
 	return 0;
-- 
cgit v1.2.3


From e156bd8ad76939a9bcd66d85cf06f8cde1fb8030 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 23 Sep 2013 12:04:37 +1000
Subject: powerpc: Fix offset of FPRs in VSX registers in little endian builds

The FPRs overlap the high doublewords of the first 32 VSX registers.
Fix TS_FPROFFSET and TS_VSRLOWOFFSET so we access the correct fields
in little endian mode.

If VSX is disabled the FPRs are only one doubleword in length so
TS_FPROFFSET needs adjusting in little endian.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/processor.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ce4de5aed7b5..82c6ee9df9a1 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -14,8 +14,18 @@
 
 #ifdef CONFIG_VSX
 #define TS_FPRWIDTH 2
+
+#ifdef __BIG_ENDIAN__
+#define TS_FPROFFSET 0
+#define TS_VSRLOWOFFSET 1
+#else
+#define TS_FPROFFSET 1
+#define TS_VSRLOWOFFSET 0
+#endif
+
 #else
 #define TS_FPRWIDTH 1
+#define TS_FPROFFSET 0
 #endif
 
 #ifdef CONFIG_PPC64
@@ -142,8 +152,6 @@ typedef struct {
 	unsigned long seg;
 } mm_segment_t;
 
-#define TS_FPROFFSET 0
-#define TS_VSRLOWOFFSET 1
 #define TS_FPR(i) fpr[i][TS_FPROFFSET]
 #define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET]
 
-- 
cgit v1.2.3


From 926f160f460170b0361f600f365369685ad74009 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 23 Sep 2013 12:04:39 +1000
Subject: powerpc: Little endian builds double word swap VSX state during
 context save/restore

The elements within VSX loads and stores are big endian ordered
regardless of endianness. Our VSX context save/restore code uses
lxvd2x and stxvd2x which is a 2x doubleword operation. This means
the two doublewords will be swapped and we have to perform another
swap to undo it.

We need to do this on save and restore.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/ppc-opcode.h |  3 +++
 arch/powerpc/include/asm/ppc_asm.h    | 21 +++++++++++++++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index d7fe9f5b46d4..ad5fcf51b252 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -181,6 +181,7 @@
 #define PPC_INST_TLBIVAX		0x7c000624
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
 #define PPC_INST_XXLOR			0xf0000510
+#define PPC_INST_XXSWAPD		0xf0000250
 #define PPC_INST_XVCPSGNDP		0xf0000780
 #define PPC_INST_TRECHKPT		0x7c0007dd
 #define PPC_INST_TRECLAIM		0x7c00075d
@@ -344,6 +345,8 @@
 					       VSX_XX1((s), a, b))
 #define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
 					       VSX_XX3((t), a, b))
+#define XXSWAPD(t, a)		stringify_in_c(.long PPC_INST_XXSWAPD | \
+					       VSX_XX3((t), a, a))
 #define XVCPSGNDP(t, a, b)	stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
 					       VSX_XX3((t), (a), (b))))
 
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 599545738af3..0c51fb4fd2aa 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -180,9 +180,20 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 #define REST_32VRS_TRANSACT(n,b,base)	REST_16VRS_TRANSACT(n,b,base);	\
 					REST_16VRS_TRANSACT(n+16,b,base)
 
+#ifdef __BIG_ENDIAN__
+#define STXVD2X_ROT(n,b,base)		STXVD2X(n,b,base)
+#define LXVD2X_ROT(n,b,base)		LXVD2X(n,b,base)
+#else
+#define STXVD2X_ROT(n,b,base)		XXSWAPD(n,n);		\
+					STXVD2X(n,b,base);	\
+					XXSWAPD(n,n)
+
+#define LXVD2X_ROT(n,b,base)		LXVD2X(n,b,base);	\
+					XXSWAPD(n,n)
+#endif
 
 #define SAVE_VSR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VSR0+(16*(n)); \
-					STXVD2X(n,R##base,R##b)
+					STXVD2X_ROT(n,R##base,R##b)
 #define SAVE_2VSRS_TRANSACT(n,b,base)	SAVE_VSR_TRANSACT(n,b,base);	\
 	                                SAVE_VSR_TRANSACT(n+1,b,base)
 #define SAVE_4VSRS_TRANSACT(n,b,base)	SAVE_2VSRS_TRANSACT(n,b,base);	\
@@ -195,7 +206,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 	                                SAVE_16VSRS_TRANSACT(n+16,b,base)
 
 #define REST_VSR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VSR0+(16*(n)); \
-					LXVD2X(n,R##base,R##b)
+					LXVD2X_ROT(n,R##base,R##b)
 #define REST_2VSRS_TRANSACT(n,b,base)	REST_VSR_TRANSACT(n,b,base);    \
 	                                REST_VSR_TRANSACT(n+1,b,base)
 #define REST_4VSRS_TRANSACT(n,b,base)	REST_2VSRS_TRANSACT(n,b,base);	\
@@ -208,13 +219,15 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 	                                REST_16VSRS_TRANSACT(n+16,b,base)
 
 /* Save the lower 32 VSRs in the thread VSR region */
-#define SAVE_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n));  STXVD2X(n,R##base,R##b)
+#define SAVE_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n)); \
+				STXVD2X_ROT(n,R##base,R##b)
 #define SAVE_2VSRS(n,b,base)	SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base)
 #define SAVE_4VSRS(n,b,base)	SAVE_2VSRS(n,b,base); SAVE_2VSRS(n+2,b,base)
 #define SAVE_8VSRS(n,b,base)	SAVE_4VSRS(n,b,base); SAVE_4VSRS(n+4,b,base)
 #define SAVE_16VSRS(n,b,base)	SAVE_8VSRS(n,b,base); SAVE_8VSRS(n+8,b,base)
 #define SAVE_32VSRS(n,b,base)	SAVE_16VSRS(n,b,base); SAVE_16VSRS(n+16,b,base)
-#define REST_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n)); LXVD2X(n,R##base,R##b)
+#define REST_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n)); \
+				LXVD2X_ROT(n,R##base,R##b)
 #define REST_2VSRS(n,b,base)	REST_VSR(n,b,base); REST_VSR(n+1,b,base)
 #define REST_4VSRS(n,b,base)	REST_2VSRS(n,b,base); REST_2VSRS(n+2,b,base)
 #define REST_8VSRS(n,b,base)	REST_4VSRS(n,b,base); REST_4VSRS(n+4,b,base)
-- 
cgit v1.2.3


From 15cba23e69435c86197d6e5035445469ca2b8484 Mon Sep 17 00:00:00 2001
From: Ian Munsie <imunsie@au1.ibm.com>
Date: Mon, 23 Sep 2013 12:04:40 +1000
Subject: powerpc: Support endian agnostic MMIO

This patch maps the MMIO functions for 32bit PowerPC to their
appropriate instructions depending on CPU endianness.

The macros used to create the corresponding inline functions are also
renamed by this patch. Previously they had BE or LE in their names which
was misleading - they had nothing to do with endianness, but actually
created different instruction forms so their new names reflect the
instruction form they are creating (D-Form and X-Form).

Little endian 64bit PowerPC is not supported, so the lack of mappings
(and corresponding breakage) for that case is intentional to bring the
attention of anyone doing a 64bit little endian port. 64bit big endian
is unaffected.

[ Added 64 bit versions - Anton ]

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/io.h | 67 +++++++++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 18 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 5a64757dc0d1..db1f29673c06 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -113,7 +113,7 @@ extern bool isa_io_special;
 
 /* gcc 4.0 and older doesn't have 'Z' constraint */
 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-#define DEF_MMIO_IN_LE(name, size, insn)				\
+#define DEF_MMIO_IN_X(name, size, insn)				\
 static inline u##size name(const volatile u##size __iomem *addr)	\
 {									\
 	u##size ret;							\
@@ -122,7 +122,7 @@ static inline u##size name(const volatile u##size __iomem *addr)	\
 	return ret;							\
 }
 
-#define DEF_MMIO_OUT_LE(name, size, insn) 				\
+#define DEF_MMIO_OUT_X(name, size, insn)				\
 static inline void name(volatile u##size __iomem *addr, u##size val)	\
 {									\
 	__asm__ __volatile__("sync;"#insn" %1,0,%2"			\
@@ -130,7 +130,7 @@ static inline void name(volatile u##size __iomem *addr, u##size val)	\
 	IO_SET_SYNC_FLAG();						\
 }
 #else /* newer gcc */
-#define DEF_MMIO_IN_LE(name, size, insn)				\
+#define DEF_MMIO_IN_X(name, size, insn)				\
 static inline u##size name(const volatile u##size __iomem *addr)	\
 {									\
 	u##size ret;							\
@@ -139,7 +139,7 @@ static inline u##size name(const volatile u##size __iomem *addr)	\
 	return ret;							\
 }
 
-#define DEF_MMIO_OUT_LE(name, size, insn) 				\
+#define DEF_MMIO_OUT_X(name, size, insn)				\
 static inline void name(volatile u##size __iomem *addr, u##size val)	\
 {									\
 	__asm__ __volatile__("sync;"#insn" %1,%y0"			\
@@ -148,7 +148,7 @@ static inline void name(volatile u##size __iomem *addr, u##size val)	\
 }
 #endif
 
-#define DEF_MMIO_IN_BE(name, size, insn)				\
+#define DEF_MMIO_IN_D(name, size, insn)				\
 static inline u##size name(const volatile u##size __iomem *addr)	\
 {									\
 	u##size ret;							\
@@ -157,7 +157,7 @@ static inline u##size name(const volatile u##size __iomem *addr)	\
 	return ret;							\
 }
 
-#define DEF_MMIO_OUT_BE(name, size, insn)				\
+#define DEF_MMIO_OUT_D(name, size, insn)				\
 static inline void name(volatile u##size __iomem *addr, u##size val)	\
 {									\
 	__asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0"			\
@@ -165,22 +165,37 @@ static inline void name(volatile u##size __iomem *addr, u##size val)	\
 	IO_SET_SYNC_FLAG();						\
 }
 
+DEF_MMIO_IN_D(in_8,     8, lbz);
+DEF_MMIO_OUT_D(out_8,   8, stb);
 
-DEF_MMIO_IN_BE(in_8,     8, lbz);
-DEF_MMIO_IN_BE(in_be16, 16, lhz);
-DEF_MMIO_IN_BE(in_be32, 32, lwz);
-DEF_MMIO_IN_LE(in_le16, 16, lhbrx);
-DEF_MMIO_IN_LE(in_le32, 32, lwbrx);
+#ifdef __BIG_ENDIAN__
+DEF_MMIO_IN_D(in_be16, 16, lhz);
+DEF_MMIO_IN_D(in_be32, 32, lwz);
+DEF_MMIO_IN_X(in_le16, 16, lhbrx);
+DEF_MMIO_IN_X(in_le32, 32, lwbrx);
 
-DEF_MMIO_OUT_BE(out_8,     8, stb);
-DEF_MMIO_OUT_BE(out_be16, 16, sth);
-DEF_MMIO_OUT_BE(out_be32, 32, stw);
-DEF_MMIO_OUT_LE(out_le16, 16, sthbrx);
-DEF_MMIO_OUT_LE(out_le32, 32, stwbrx);
+DEF_MMIO_OUT_D(out_be16, 16, sth);
+DEF_MMIO_OUT_D(out_be32, 32, stw);
+DEF_MMIO_OUT_X(out_le16, 16, sthbrx);
+DEF_MMIO_OUT_X(out_le32, 32, stwbrx);
+#else
+DEF_MMIO_IN_X(in_be16, 16, lhbrx);
+DEF_MMIO_IN_X(in_be32, 32, lwbrx);
+DEF_MMIO_IN_D(in_le16, 16, lhz);
+DEF_MMIO_IN_D(in_le32, 32, lwz);
+
+DEF_MMIO_OUT_X(out_be16, 16, sthbrx);
+DEF_MMIO_OUT_X(out_be32, 32, stwbrx);
+DEF_MMIO_OUT_D(out_le16, 16, sth);
+DEF_MMIO_OUT_D(out_le32, 32, stw);
+
+#endif /* __BIG_ENDIAN */
 
 #ifdef __powerpc64__
-DEF_MMIO_OUT_BE(out_be64, 64, std);
-DEF_MMIO_IN_BE(in_be64, 64, ld);
+
+#ifdef __BIG_ENDIAN__
+DEF_MMIO_OUT_D(out_be64, 64, std);
+DEF_MMIO_IN_D(in_be64, 64, ld);
 
 /* There is no asm instructions for 64 bits reverse loads and stores */
 static inline u64 in_le64(const volatile u64 __iomem *addr)
@@ -192,6 +207,22 @@ static inline void out_le64(volatile u64 __iomem *addr, u64 val)
 {
 	out_be64(addr, swab64(val));
 }
+#else
+DEF_MMIO_OUT_D(out_le64, 64, std);
+DEF_MMIO_IN_D(in_le64, 64, ld);
+
+/* There is no asm instructions for 64 bits reverse loads and stores */
+static inline u64 in_be64(const volatile u64 __iomem *addr)
+{
+	return swab64(in_le64(addr));
+}
+
+static inline void out_be64(volatile u64 __iomem *addr, u64 val)
+{
+	out_le64(addr, swab64(val));
+}
+
+#endif
 #endif /* __powerpc64__ */
 
 /*
-- 
cgit v1.2.3


From 4c74c330c2d84aec9f2b4e87827b08814c266b6b Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 23 Sep 2013 12:04:41 +1000
Subject: powerpc: Add little endian support for word-at-a-time functions

The powerpc word-at-a-time functions are big endian specific.
Bring in the x86 version in order to support little endian builds.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/word-at-a-time.h | 71 +++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index d0b6d4ac6dda..213a5f2b0717 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -8,6 +8,8 @@
 #include <linux/kernel.h>
 #include <asm/asm-compat.h>
 
+#ifdef __BIG_ENDIAN__
+
 struct word_at_a_time {
 	const unsigned long high_bits, low_bits;
 };
@@ -38,4 +40,73 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct
 	return (val + c->high_bits) & ~rhs;
 }
 
+#else
+
+/*
+ * This is largely generic for little-endian machines, but the
+ * optimal byte mask counting is probably going to be something
+ * that is architecture-specific. If you have a reliably fast
+ * bit count instruction, that might be better than the multiply
+ * and shift, for example.
+ */
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+#ifdef CONFIG_64BIT
+
+/*
+ * Jan Achrenius on G+: microoptimized version of
+ * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
+ * that works for the bytemasks without having to
+ * mask them first.
+ */
+static inline long count_masked_bytes(unsigned long mask)
+{
+	return mask*0x0001020304050608ul >> 56;
+}
+
+#else	/* 32-bit case */
+
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	long a = (0x0ff0001+mask) >> 23;
+	/* Fix the 1 for 00 case */
+	return a & mask;
+}
+
+#endif
+
+/* Return nonzero if it has a zero */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+/* The mask we created is directly usable as a bytemask */
+#define zero_bytemask(mask) (mask)
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	return count_masked_bytes(mask);
+}
+#endif
+
 #endif /* _ASM_WORD_AT_A_TIME_H */
-- 
cgit v1.2.3


From ef1967ff875d58c290108f9c6872a53a79855c4b Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 23 Sep 2013 12:04:42 +1000
Subject: powerpc: Set MSR_LE bit on little endian builds

We need to set MSR_LE in kernel and userspace for little endian builds

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/reg.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 10d1ef016bf1..126f6e98f84d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -115,7 +115,12 @@
 #define MSR_64BIT	MSR_SF
 
 /* Server variant */
-#define MSR_		(MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
+#define __MSR		(MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
+#ifdef __BIG_ENDIAN__
+#define MSR_		__MSR
+#else
+#define MSR_		(__MSR | MSR_LE)
+#endif
 #define MSR_KERNEL	(MSR_ | MSR_64BIT)
 #define MSR_USER32	(MSR_ | MSR_PR | MSR_EE)
 #define MSR_USER64	(MSR_USER32 | MSR_64BIT)
-- 
cgit v1.2.3


From c57a5ce0df04be61bafedc0f3043d568103c7ab5 Mon Sep 17 00:00:00 2001
From: Ian Munsie <imunsie@au1.ibm.com>
Date: Mon, 23 Sep 2013 12:04:44 +1000
Subject: powerpc: Include the appropriate endianness header

This patch will have powerpc include the appropriate generic endianness
header depending on what the compiler reports.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/uapi/asm/byteorder.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/uapi/asm/byteorder.h b/arch/powerpc/include/uapi/asm/byteorder.h
index aa6cc4fac965..ca931d074000 100644
--- a/arch/powerpc/include/uapi/asm/byteorder.h
+++ b/arch/powerpc/include/uapi/asm/byteorder.h
@@ -7,6 +7,10 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#ifdef __LITTLE_ENDIAN__
+#include <linux/byteorder/little_endian.h>
+#else
 #include <linux/byteorder/big_endian.h>
+#endif
 
 #endif /* _ASM_POWERPC_BYTEORDER_H */
-- 
cgit v1.2.3


From 5c0484e25ec03243d4c2f2d4416d4a13efc77f6a Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 23 Sep 2013 12:04:45 +1000
Subject: powerpc: Endian safe trampoline

Create a trampoline that works in either endian and flips to
the expected endian. Use it for primary and secondary thread
entry as well as RTAS and OF call return.

Credit for finding the magic instruction goes to Paul Mackerras

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/ppc_asm.h | 31 ++++++++++++++++++++++++++++++-
 arch/powerpc/kernel/entry_64.S     | 36 ++++++++++++++++++++----------------
 arch/powerpc/kernel/head_64.S      |  3 +++
 3 files changed, 53 insertions(+), 17 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 0c51fb4fd2aa..ce05bba0bfc6 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -845,6 +845,35 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946)
 #define N_SLINE	68
 #define N_SO	100
 
-#endif /*  __ASSEMBLY__ */
+/*
+ * Create an endian fixup trampoline
+ *
+ * This starts with a "tdi 0,0,0x48" instruction which is
+ * essentially a "trap never", and thus akin to a nop.
+ *
+ * The opcode for this instruction read with the wrong endian
+ * however results in a b . + 8
+ *
+ * So essentially we use that trick to execute the following
+ * trampoline in "reverse endian" if we are running with the
+ * MSR_LE bit set the "wrong" way for whatever endianness the
+ * kernel is built for.
+ */
 
+#ifdef CONFIG_PPC_BOOK3E
+#define FIXUP_ENDIAN
+#else
+#define FIXUP_ENDIAN						   \
+	tdi   0,0,0x48;	  /* Reverse endian of b . + 8		*/ \
+	b     $+36;	  /* Skip trampoline if endian is good	*/ \
+	.long 0x05009f42; /* bcl 20,31,$+4			*/ \
+	.long 0xa602487d; /* mflr r10				*/ \
+	.long 0x1c004a39; /* addi r10,r10,28			*/ \
+	.long 0xa600607d; /* mfmsr r11				*/ \
+	.long 0x01006b69; /* xori r11,r11,1			*/ \
+	.long 0xa6035a7d; /* mtsrr0 r10				*/ \
+	.long 0xa6037b7d; /* mtsrr1 r11				*/ \
+	.long 0x2400004c  /* rfid				*/
+#endif /* !CONFIG_PPC_BOOK3E */
+#endif /*  __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_PPC_ASM_H */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c04cdf70d487..889ea2b19383 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1017,7 +1017,7 @@ _GLOBAL(enter_rtas)
 	
         li      r9,1
         rldicr  r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
-	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI
+	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
 	andc	r6,r0,r9
 	sync				/* disable interrupts so SRR0/1 */
 	mtmsrd	r0			/* don't get trashed */
@@ -1032,6 +1032,8 @@ _GLOBAL(enter_rtas)
 	b	.	/* prevent speculative execution */
 
 _STATIC(rtas_return_loc)
+	FIXUP_ENDIAN
+
 	/* relocation is off at this point */
 	GET_PACA(r4)
 	clrldi	r4,r4,2			/* convert to realmode address */
@@ -1103,28 +1105,30 @@ _GLOBAL(enter_prom)
 	std	r10,_CCR(r1)
 	std	r11,_MSR(r1)
 
-	/* Get the PROM entrypoint */
-	mtlr	r4
+	/* Put PROM address in SRR0 */
+	mtsrr0	r4
+
+	/* Setup our trampoline return addr in LR */
+	bcl	20,31,$+4
+0:	mflr	r4
+	addi	r4,r4,(1f - 0b)
+       	mtlr	r4
 
-	/* Switch MSR to 32 bits mode
+	/* Prepare a 32-bit mode big endian MSR
 	 */
 #ifdef CONFIG_PPC_BOOK3E
 	rlwinm	r11,r11,0,1,31
-	mtmsr	r11
+	mtsrr1	r11
+	rfi
 #else /* CONFIG_PPC_BOOK3E */
-        mfmsr   r11
-        li      r12,1
-        rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
-        andc    r11,r11,r12
-        li      r12,1
-        rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
-        andc    r11,r11,r12
-        mtmsrd  r11
+	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
+	andc	r11,r11,r12
+	mtsrr1	r11
+	rfid
 #endif /* CONFIG_PPC_BOOK3E */
-        isync
 
-	/* Enter PROM here... */
-	blrl
+1:	/* Return from OF */
+	FIXUP_ENDIAN
 
 	/* Just make sure that r1 top 32 bits didn't get
 	 * corrupt by OF
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3d11d8038dee..2ae41aba4053 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -68,6 +68,7 @@ _stext:
 _GLOBAL(__start)
 	/* NOP this out unconditionally */
 BEGIN_FTR_SECTION
+	FIXUP_ENDIAN
 	b	.__start_initialization_multiplatform
 END_FTR_SECTION(0, 1)
 
@@ -115,6 +116,7 @@ __run_at_load:
  */
 	.globl	__secondary_hold
 __secondary_hold:
+	FIXUP_ENDIAN
 #ifndef CONFIG_PPC_BOOK3E
 	mfmsr	r24
 	ori	r24,r24,MSR_RI
@@ -205,6 +207,7 @@ _GLOBAL(generic_secondary_thread_init)
  * as SCOM before entry).
  */
 _GLOBAL(generic_secondary_smp_init)
+	FIXUP_ENDIAN
 	mr	r24,r3
 	mr	r25,r4
 
-- 
cgit v1.2.3


From 7a332b0c9a59e0b0777dec55eefdda0f9a24ac52 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 23 Sep 2013 12:04:51 +1000
Subject: powerpc: Use generic checksum code in little endian

We need to fix some endian issues in our checksum code. For now
just enable the generic checksum routines for little endian builds.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/Kconfig                | 3 +++
 arch/powerpc/include/asm/checksum.h | 5 +++++
 arch/powerpc/kernel/ppc_ksyms.c     | 2 ++
 arch/powerpc/lib/Makefile           | 9 +++++++--
 4 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 38f3b7e47ec5..2c69c43ba68e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -139,6 +139,9 @@ config PPC
 	select OLD_SIGACTION if PPC32
 	select HAVE_DEBUG_STACKOVERFLOW
 
+config GENERIC_CSUM
+	def_bool CPU_LITTLE_ENDIAN
+
 config EARLY_PRINTK
 	bool
 	default y
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index ce0c28495f9a..8251a3ba870f 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -14,6 +14,9 @@
  * which always checksum on 4 octet boundaries.  ihl is the number
  * of 32-bit words and is always >= 5.
  */
+#ifdef CONFIG_GENERIC_CSUM
+#include <asm-generic/checksum.h>
+#else
 extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
 
 /*
@@ -123,5 +126,7 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
 	return sum;
 #endif
 }
+
+#endif
 #endif /* __KERNEL__ */
 #endif
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 21646dbe1bb3..3b485c5cff10 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -79,10 +79,12 @@ EXPORT_SYMBOL(strlen);
 EXPORT_SYMBOL(strcmp);
 EXPORT_SYMBOL(strncmp);
 
+#ifndef CONFIG_GENERIC_CSUM
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(csum_partial_copy_generic);
 EXPORT_SYMBOL(ip_fast_csum);
 EXPORT_SYMBOL(csum_tcpudp_magic);
+#endif
 
 EXPORT_SYMBOL(__copy_tofrom_user);
 EXPORT_SYMBOL(__clear_user);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 450433276699..33ab26112123 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -10,15 +10,20 @@ CFLAGS_REMOVE_code-patching.o = -pg
 CFLAGS_REMOVE_feature-fixups.o = -pg
 
 obj-y			:= string.o alloc.o \
-			   checksum_$(CONFIG_WORD_SIZE).o crtsavres.o
+			   crtsavres.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o
 obj-$(CONFIG_HAS_IOMEM)	+= devres.o
 
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
 			   memcpy_64.o usercopy_64.o mem_64.o string.o \
-			   checksum_wrappers_64.o hweight_64.o \
+			   hweight_64.o \
 			   copyuser_power7.o string_64.o copypage_power7.o \
 			   memcpy_power7.o
+ifeq ($(CONFIG_GENERIC_CSUM),)
+obj-y			+= checksum_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_PPC64)	+= checksum_wrappers_64.o
+endif
+
 obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o
 
 ifeq ($(CONFIG_PPC64),y)
-- 
cgit v1.2.3


From de577a356848a629b2c7f252ca3d1bc87375b52b Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 23 Sep 2013 12:04:52 +1000
Subject: powerpc: Use generic memcpy code in little endian

We need to fix some endian issues in our memcpy code. For now
just enable the generic memcpy routine for little endian builds.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/string.h | 4 ++++
 arch/powerpc/kernel/ppc_ksyms.c   | 2 ++
 arch/powerpc/lib/Makefile         | 9 ++++++---
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index e40010abcaf1..0dffad6bcc84 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -10,7 +10,9 @@
 #define __HAVE_ARCH_STRNCMP
 #define __HAVE_ARCH_STRCAT
 #define __HAVE_ARCH_MEMSET
+#ifdef __BIG_ENDIAN__
 #define __HAVE_ARCH_MEMCPY
+#endif
 #define __HAVE_ARCH_MEMMOVE
 #define __HAVE_ARCH_MEMCMP
 #define __HAVE_ARCH_MEMCHR
@@ -22,7 +24,9 @@ extern int strcmp(const char *,const char *);
 extern int strncmp(const char *, const char *, __kernel_size_t);
 extern char * strcat(char *, const char *);
 extern void * memset(void *,int,__kernel_size_t);
+#ifdef __BIG_ENDIAN__
 extern void * memcpy(void *,const void *,__kernel_size_t);
+#endif
 extern void * memmove(void *,const void *,__kernel_size_t);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 3b485c5cff10..60bbeb29f9b8 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -148,7 +148,9 @@ EXPORT_SYMBOL(__ucmpdi2);
 #endif
 long long __bswapdi2(long long);
 EXPORT_SYMBOL(__bswapdi2);
+#ifdef __BIG_ENDIAN__
 EXPORT_SYMBOL(memcpy);
+#endif
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcmp);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 33ab26112123..5310132856c1 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -15,15 +15,18 @@ obj-$(CONFIG_PPC32)	+= div64.o copy_32.o
 obj-$(CONFIG_HAS_IOMEM)	+= devres.o
 
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
-			   memcpy_64.o usercopy_64.o mem_64.o string.o \
+			   usercopy_64.o mem_64.o string.o \
 			   hweight_64.o \
-			   copyuser_power7.o string_64.o copypage_power7.o \
-			   memcpy_power7.o
+			   copyuser_power7.o string_64.o copypage_power7.o
 ifeq ($(CONFIG_GENERIC_CSUM),)
 obj-y			+= checksum_$(CONFIG_WORD_SIZE).o
 obj-$(CONFIG_PPC64)	+= checksum_wrappers_64.o
 endif
 
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
+obj-$(CONFIG_PPC64)		+= memcpy_power7.o memcpy_64.o 
+endif
+
 obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o
 
 ifeq ($(CONFIG_PPC64),y)
-- 
cgit v1.2.3


From 4f89363b1187ca0cc36ee6aebe0bee550f74288d Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 23 Sep 2013 12:05:02 +1000
Subject: powerpc/powernv: Fix endian issues in OPAL console and udbg backend

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/opal.h       |  8 ++++----
 arch/powerpc/platforms/powernv/opal.c | 28 ++++++++++++++++------------
 2 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index c5cd72833d6e..6622ea438f0e 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -537,12 +537,12 @@ typedef struct oppanel_line {
 } oppanel_line_t;
 
 /* API functions */
-int64_t opal_console_write(int64_t term_number, int64_t *length,
+int64_t opal_console_write(int64_t term_number, __be64 *length,
 			   const uint8_t *buffer);
-int64_t opal_console_read(int64_t term_number, int64_t *length,
+int64_t opal_console_read(int64_t term_number, __be64 *length,
 			  uint8_t *buffer);
 int64_t opal_console_write_buffer_space(int64_t term_number,
-					int64_t *length);
+					__be64 *length);
 int64_t opal_rtc_read(uint32_t *year_month_day,
 		      uint64_t *hour_minute_second_millisecond);
 int64_t opal_rtc_write(uint32_t year_month_day,
@@ -552,7 +552,7 @@ int64_t opal_cec_reboot(void);
 int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
 int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
 int64_t opal_handle_interrupt(uint64_t isn, uint64_t *outstanding_event_mask);
-int64_t opal_poll_events(uint64_t *outstanding_event_mask);
+int64_t opal_poll_events(__be64 *outstanding_event_mask);
 int64_t opal_pci_set_hub_tce_memory(uint64_t hub_id, uint64_t tce_mem_addr,
 				    uint64_t tce_mem_size);
 int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id, uint64_t tce_mem_addr,
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 4ffa75ef284b..eb7bf3bf604b 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -164,27 +164,28 @@ void opal_notifier_disable(void)
 
 int opal_get_chars(uint32_t vtermno, char *buf, int count)
 {
-	s64 len, rc;
-	u64 evt;
+	s64 rc;
+	__be64 evt, len;
 
 	if (!opal.entry)
 		return -ENODEV;
 	opal_poll_events(&evt);
-	if ((evt & OPAL_EVENT_CONSOLE_INPUT) == 0)
+	if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
 		return 0;
-	len = count;
-	rc = opal_console_read(vtermno, &len, buf);
+	len = cpu_to_be64(count);
+	rc = opal_console_read(vtermno, &len, buf);	
 	if (rc == OPAL_SUCCESS)
-		return len;
+		return be64_to_cpu(len);
 	return 0;
 }
 
 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 {
 	int written = 0;
+	__be64 olen;
 	s64 len, rc;
 	unsigned long flags;
-	u64 evt;
+	__be64 evt;
 
 	if (!opal.entry)
 		return -ENODEV;
@@ -199,13 +200,14 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 	 */
 	spin_lock_irqsave(&opal_write_lock, flags);
 	if (firmware_has_feature(FW_FEATURE_OPALv2)) {
-		rc = opal_console_write_buffer_space(vtermno, &len);
+		rc = opal_console_write_buffer_space(vtermno, &olen);
+		len = be64_to_cpu(olen);
 		if (rc || len < total_len) {
 			spin_unlock_irqrestore(&opal_write_lock, flags);
 			/* Closed -> drop characters */
 			if (rc)
 				return total_len;
-			opal_poll_events(&evt);
+			opal_poll_events(NULL);
 			return -EAGAIN;
 		}
 	}
@@ -216,8 +218,9 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 	rc = OPAL_BUSY;
 	while(total_len > 0 && (rc == OPAL_BUSY ||
 				rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
-		len = total_len;
-		rc = opal_console_write(vtermno, &len, data);
+		olen = cpu_to_be64(total_len);
+		rc = opal_console_write(vtermno, &olen, data);
+		len = be64_to_cpu(olen);
 
 		/* Closed or other error drop */
 		if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
@@ -237,7 +240,8 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 		 */
 		do
 			opal_poll_events(&evt);
-		while(rc == OPAL_SUCCESS && (evt & OPAL_EVENT_CONSOLE_OUTPUT));
+		while(rc == OPAL_SUCCESS &&
+			(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
 	}
 	spin_unlock_irqrestore(&opal_write_lock, flags);
 	return written;
-- 
cgit v1.2.3


From 6feff6d4a5e1ac8c48d88860bf705be7709b42af Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 23 Sep 2013 12:05:05 +1000
Subject: powerpc/powernv: More little endian issues in OPAL RTC driver

Sparse caught an issue where opal_set_rtc_time was incorrectly
byteswapping. Also fix a number of sparse warnings.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/opal.h           |  4 ++--
 arch/powerpc/platforms/powernv/opal-rtc.c | 19 ++++++++++---------
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 6622ea438f0e..3db5e82ee942 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -543,8 +543,8 @@ int64_t opal_console_read(int64_t term_number, __be64 *length,
 			  uint8_t *buffer);
 int64_t opal_console_write_buffer_space(int64_t term_number,
 					__be64 *length);
-int64_t opal_rtc_read(uint32_t *year_month_day,
-		      uint64_t *hour_minute_second_millisecond);
+int64_t opal_rtc_read(__be32 *year_month_day,
+		      __be64 *hour_minute_second_millisecond);
 int64_t opal_rtc_write(uint32_t year_month_day,
 		       uint64_t hour_minute_second_millisecond);
 int64_t opal_cec_power_down(uint64_t request);
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index dbfdba35fcf3..7d07c7e80ec0 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -37,10 +37,12 @@ unsigned long __init opal_get_boot_time(void)
 	struct rtc_time tm;
 	u32 y_m_d;
 	u64 h_m_s_ms;
+	__be32 __y_m_d;
+	__be64 __h_m_s_ms;
 	long rc = OPAL_BUSY;
 
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
-		rc = opal_rtc_read(&y_m_d, &h_m_s_ms);
+		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
 		if (rc == OPAL_BUSY_EVENT)
 			opal_poll_events(NULL);
 		else
@@ -48,8 +50,8 @@ unsigned long __init opal_get_boot_time(void)
 	}
 	if (rc != OPAL_SUCCESS)
 		return 0;
-	y_m_d = be32_to_cpu(y_m_d);
-	h_m_s_ms = be64_to_cpu(h_m_s_ms);
+	y_m_d = be32_to_cpu(__y_m_d);
+	h_m_s_ms = be64_to_cpu(__h_m_s_ms);
 	opal_to_tm(y_m_d, h_m_s_ms, &tm);
 	return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 		      tm.tm_hour, tm.tm_min, tm.tm_sec);
@@ -60,9 +62,11 @@ void opal_get_rtc_time(struct rtc_time *tm)
 	long rc = OPAL_BUSY;
 	u32 y_m_d;
 	u64 h_m_s_ms;
+	__be32 __y_m_d;
+	__be64 __h_m_s_ms;
 
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
-		rc = opal_rtc_read(&y_m_d, &h_m_s_ms);
+		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
 		if (rc == OPAL_BUSY_EVENT)
 			opal_poll_events(NULL);
 		else
@@ -70,8 +74,8 @@ void opal_get_rtc_time(struct rtc_time *tm)
 	}
 	if (rc != OPAL_SUCCESS)
 		return;
-	y_m_d = be32_to_cpu(y_m_d);
-	h_m_s_ms = be64_to_cpu(h_m_s_ms);
+	y_m_d = be32_to_cpu(__y_m_d);
+	h_m_s_ms = be64_to_cpu(__h_m_s_ms);
 	opal_to_tm(y_m_d, h_m_s_ms, tm);
 }
 
@@ -90,9 +94,6 @@ int opal_set_rtc_time(struct rtc_time *tm)
 	h_m_s_ms |= ((u64)bin2bcd(tm->tm_min)) << 48;
 	h_m_s_ms |= ((u64)bin2bcd(tm->tm_sec)) << 40;
 
-	y_m_d = cpu_to_be32(y_m_d);
-	h_m_s_ms = cpu_to_be64(h_m_s_ms);
-
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_rtc_write(y_m_d, h_m_s_ms);
 		if (rc == OPAL_BUSY_EVENT)
-- 
cgit v1.2.3


From 5e4da530a5348e53bbb9f6f7f73c9afc67ed6c35 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 23 Sep 2013 12:05:06 +1000
Subject: powerpc/powernv: Fix some PCI sparse errors and one LE bug

pnv_pci_setup_bml_iommu was missing a byteswap of a device
tree property.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/opal.h           | 22 +++++++++++-----------
 arch/powerpc/platforms/powernv/opal.c     |  2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  6 +++---
 arch/powerpc/platforms/powernv/pci.c      |  6 +++---
 arch/powerpc/platforms/powernv/pci.h      |  2 +-
 5 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 3db5e82ee942..51e3b265ec12 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -551,7 +551,7 @@ int64_t opal_cec_power_down(uint64_t request);
 int64_t opal_cec_reboot(void);
 int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
 int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
-int64_t opal_handle_interrupt(uint64_t isn, uint64_t *outstanding_event_mask);
+int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask);
 int64_t opal_poll_events(__be64 *outstanding_event_mask);
 int64_t opal_pci_set_hub_tce_memory(uint64_t hub_id, uint64_t tce_mem_addr,
 				    uint64_t tce_mem_size);
@@ -560,9 +560,9 @@ int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id, uint64_t tce_mem_addr,
 int64_t opal_pci_config_read_byte(uint64_t phb_id, uint64_t bus_dev_func,
 				  uint64_t offset, uint8_t *data);
 int64_t opal_pci_config_read_half_word(uint64_t phb_id, uint64_t bus_dev_func,
-				       uint64_t offset, uint16_t *data);
+				       uint64_t offset, __be16 *data);
 int64_t opal_pci_config_read_word(uint64_t phb_id, uint64_t bus_dev_func,
-				  uint64_t offset, uint32_t *data);
+				  uint64_t offset, __be32 *data);
 int64_t opal_pci_config_write_byte(uint64_t phb_id, uint64_t bus_dev_func,
 				   uint64_t offset, uint8_t data);
 int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
@@ -570,14 +570,14 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
 int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func,
 				   uint64_t offset, uint32_t data);
 int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
-int64_t opal_get_xive(uint32_t isn, uint16_t *server, uint8_t *priority);
+int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority);
 int64_t opal_register_exception_handler(uint64_t opal_exception,
 					uint64_t handler_address,
 					uint64_t glue_cache_line);
 int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number,
 				   uint8_t *freeze_state,
-				   uint16_t *pci_error_type,
-				   uint64_t *phb_status);
+				   __be16 *pci_error_type,
+				   __be64 *phb_status);
 int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number,
 				  uint64_t eeh_action_token);
 int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state);
@@ -614,13 +614,13 @@ int64_t opal_pci_msi_eoi(uint64_t phb_id, uint32_t hw_irq);
 int64_t opal_pci_set_xive_pe(uint64_t phb_id, uint32_t pe_number,
 			     uint32_t xive_num);
 int64_t opal_get_xive_source(uint64_t phb_id, uint32_t xive_num,
-			     int32_t *interrupt_source_number);
+			     __be32 *interrupt_source_number);
 int64_t opal_get_msi_32(uint64_t phb_id, uint32_t mve_number, uint32_t xive_num,
-			uint8_t msi_range, uint32_t *msi_address,
-			uint32_t *message_data);
+			uint8_t msi_range, __be32 *msi_address,
+			__be32 *message_data);
 int64_t opal_get_msi_64(uint64_t phb_id, uint32_t mve_number,
 			uint32_t xive_num, uint8_t msi_range,
-			uint64_t *msi_address, uint32_t *message_data);
+			__be64 *msi_address, __be32 *message_data);
 int64_t opal_start_cpu(uint64_t thread_number, uint64_t start_address);
 int64_t opal_query_cpu_status(uint64_t thread_number, uint8_t *thread_status);
 int64_t opal_write_oppanel(oppanel_line_t *lines, uint64_t num_lines);
@@ -642,7 +642,7 @@ int64_t opal_pci_fence_phb(uint64_t phb_id);
 int64_t opal_pci_reinit(uint64_t phb_id, uint8_t reinit_scope);
 int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action);
 int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action);
-int64_t opal_get_epow_status(uint64_t *status);
+int64_t opal_get_epow_status(__be64 *status);
 int64_t opal_set_system_attention_led(uint8_t led_action);
 int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe,
 			    uint16_t *pci_error_type, uint16_t *severity);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index c2391bbdab12..09336f0c54c5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -366,7 +366,7 @@ int opal_machine_check(struct pt_regs *regs)
 
 static irqreturn_t opal_interrupt(int irq, void *data)
 {
-	uint64_t events;
+	__be64 events;
 
 	opal_handle_interrupt(virq_to_hw(irq), &events);
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index f9cb6c53a0ca..a6531d2ff6c2 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -455,7 +455,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
 }
 
 static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
-					 u64 *startp, u64 *endp)
+					 __be64 *startp, __be64 *endp)
 {
 	__be64 __iomem *invalidate = (__be64 __iomem *)tbl->it_index;
 	unsigned long start, end, inc;
@@ -496,7 +496,7 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
 
 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 					 struct iommu_table *tbl,
-					 u64 *startp, u64 *endp)
+					 __be64 *startp, __be64 *endp)
 {
 	unsigned long start, end, inc;
 	__be64 __iomem *invalidate = (__be64 __iomem *)tbl->it_index;
@@ -521,7 +521,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 }
 
 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
-				 u64 *startp, u64 *endp)
+				 __be64 *startp, __be64 *endp)
 {
 	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
 					      tce32_table);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 2f73e0da2603..a26956c5f38c 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -412,7 +412,7 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 	if (direction != DMA_TO_DEVICE)
 		proto_tce |= TCE_PCI_WRITE;
 
-	tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
+	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
 	rpn = __pa(uaddr) >> TCE_SHIFT;
 
 	while (npages--)
@@ -432,7 +432,7 @@ static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
 {
 	__be64 *tcep, *tces;
 
-	tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
+	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
 
 	while (npages--)
 		*(tcep++) = cpu_to_be64(0);
@@ -484,7 +484,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
 	swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
 				 NULL);
 	if (swinvp) {
-		tbl->it_busno = swinvp[1];
+		tbl->it_busno = be64_to_cpu(swinvp[1]);
 		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
 		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
 	}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index d633c64e05a1..dfe20104238e 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -193,6 +193,6 @@ extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
-					u64 *startp, u64 *endp);
+					__be64 *startp, __be64 *endp);
 
 #endif /* __POWERNV_PCI_H */
-- 
cgit v1.2.3


From 99fc1d91b8fc30c969b0a2d152c803413ecb8cef Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 23 Sep 2013 12:05:07 +1000
Subject: powerpc/hvsi: Fix endian issues in HVSI driver

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/hvsi.h | 16 ++++++++--------
 drivers/tty/hvc/hvsi_lib.c      | 25 ++++++++++++-------------
 2 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/hvsi.h b/arch/powerpc/include/asm/hvsi.h
index d3f64f361814..d4a5315718ca 100644
--- a/arch/powerpc/include/asm/hvsi.h
+++ b/arch/powerpc/include/asm/hvsi.h
@@ -25,7 +25,7 @@
 struct hvsi_header {
 	uint8_t  type;
 	uint8_t  len;
-	uint16_t seqno;
+	__be16 seqno;
 } __attribute__((packed));
 
 struct hvsi_data {
@@ -35,24 +35,24 @@ struct hvsi_data {
 
 struct hvsi_control {
 	struct hvsi_header hdr;
-	uint16_t verb;
+	__be16 verb;
 	/* optional depending on verb: */
-	uint32_t word;
-	uint32_t mask;
+	__be32 word;
+	__be32 mask;
 } __attribute__((packed));
 
 struct hvsi_query {
 	struct hvsi_header hdr;
-	uint16_t verb;
+	__be16 verb;
 } __attribute__((packed));
 
 struct hvsi_query_response {
 	struct hvsi_header hdr;
-	uint16_t verb;
-	uint16_t query_seqno;
+	__be16 verb;
+	__be16 query_seqno;
 	union {
 		uint8_t  version;
-		uint32_t mctrl_word;
+		__be32 mctrl_word;
 	} u;
 } __attribute__((packed));
 
diff --git a/drivers/tty/hvc/hvsi_lib.c b/drivers/tty/hvc/hvsi_lib.c
index ac2767100df5..347050ea414a 100644
--- a/drivers/tty/hvc/hvsi_lib.c
+++ b/drivers/tty/hvc/hvsi_lib.c
@@ -9,7 +9,7 @@
 
 static int hvsi_send_packet(struct hvsi_priv *pv, struct hvsi_header *packet)
 {
-	packet->seqno = atomic_inc_return(&pv->seqno);
+	packet->seqno = cpu_to_be16(atomic_inc_return(&pv->seqno));
 
 	/* Assumes that always succeeds, works in practice */
 	return pv->put_chars(pv->termno, (char *)packet, packet->len);
@@ -28,7 +28,7 @@ static void hvsi_start_handshake(struct hvsi_priv *pv)
 	/* Send version query */
 	q.hdr.type = VS_QUERY_PACKET_HEADER;
 	q.hdr.len = sizeof(struct hvsi_query);
-	q.verb = VSV_SEND_VERSION_NUMBER;
+	q.verb = cpu_to_be16(VSV_SEND_VERSION_NUMBER);
 	hvsi_send_packet(pv, &q.hdr);
 }
 
@@ -40,7 +40,7 @@ static int hvsi_send_close(struct hvsi_priv *pv)
 
 	ctrl.hdr.type = VS_CONTROL_PACKET_HEADER;
 	ctrl.hdr.len = sizeof(struct hvsi_control);
-	ctrl.verb = VSV_CLOSE_PROTOCOL;
+	ctrl.verb = cpu_to_be16(VSV_CLOSE_PROTOCOL);
 	return hvsi_send_packet(pv, &ctrl.hdr);
 }
 
@@ -69,14 +69,14 @@ static void hvsi_got_control(struct hvsi_priv *pv)
 {
 	struct hvsi_control *pkt = (struct hvsi_control *)pv->inbuf;
 
-	switch (pkt->verb) {
+	switch (be16_to_cpu(pkt->verb)) {
 	case VSV_CLOSE_PROTOCOL:
 		/* We restart the handshaking */
 		hvsi_start_handshake(pv);
 		break;
 	case VSV_MODEM_CTL_UPDATE:
 		/* Transition of carrier detect */
-		hvsi_cd_change(pv, pkt->word & HVSI_TSCD);
+		hvsi_cd_change(pv, be32_to_cpu(pkt->word) & HVSI_TSCD);
 		break;
 	}
 }
@@ -87,7 +87,7 @@ static void hvsi_got_query(struct hvsi_priv *pv)
 	struct hvsi_query_response r;
 
 	/* We only handle version queries */
-	if (pkt->verb != VSV_SEND_VERSION_NUMBER)
+	if (be16_to_cpu(pkt->verb) != VSV_SEND_VERSION_NUMBER)
 		return;
 
 	pr_devel("HVSI@%x: Got version query, sending response...\n",
@@ -96,7 +96,7 @@ static void hvsi_got_query(struct hvsi_priv *pv)
 	/* Send version response */
 	r.hdr.type = VS_QUERY_RESPONSE_PACKET_HEADER;
 	r.hdr.len = sizeof(struct hvsi_query_response);
-	r.verb = VSV_SEND_VERSION_NUMBER;
+	r.verb = cpu_to_be16(VSV_SEND_VERSION_NUMBER);
 	r.u.version = HVSI_VERSION;
 	r.query_seqno = pkt->hdr.seqno;
 	hvsi_send_packet(pv, &r.hdr);
@@ -112,7 +112,7 @@ static void hvsi_got_response(struct hvsi_priv *pv)
 
 	switch(r->verb) {
 	case VSV_SEND_MODEM_CTL_STATUS:
-		hvsi_cd_change(pv, r->u.mctrl_word & HVSI_TSCD);
+		hvsi_cd_change(pv, be32_to_cpu(r->u.mctrl_word) & HVSI_TSCD);
 		pv->mctrl_update = 1;
 		break;
 	}
@@ -265,8 +265,7 @@ int hvsilib_read_mctrl(struct hvsi_priv *pv)
 	pv->mctrl_update = 0;
 	q.hdr.type = VS_QUERY_PACKET_HEADER;
 	q.hdr.len = sizeof(struct hvsi_query);
-	q.hdr.seqno = atomic_inc_return(&pv->seqno);
-	q.verb = VSV_SEND_MODEM_CTL_STATUS;
+	q.verb = cpu_to_be16(VSV_SEND_MODEM_CTL_STATUS);
 	rc = hvsi_send_packet(pv, &q.hdr);
 	if (rc <= 0) {
 		pr_devel("HVSI@%x: Error %d...\n", pv->termno, rc);
@@ -304,9 +303,9 @@ int hvsilib_write_mctrl(struct hvsi_priv *pv, int dtr)
 
 	ctrl.hdr.type = VS_CONTROL_PACKET_HEADER,
 	ctrl.hdr.len = sizeof(struct hvsi_control);
-	ctrl.verb = VSV_SET_MODEM_CTL;
-	ctrl.mask = HVSI_TSDTR;
-	ctrl.word = dtr ? HVSI_TSDTR : 0;
+	ctrl.verb = cpu_to_be16(VSV_SET_MODEM_CTL);
+	ctrl.mask = cpu_to_be32(HVSI_TSDTR);
+	ctrl.word = cpu_to_be32(dtr ? HVSI_TSDTR : 0);
 	return hvsi_send_packet(pv, &ctrl.hdr);
 }
 
-- 
cgit v1.2.3


From a4da0d50b2a00b79390092e6248ca88b7d93c81d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 11 Oct 2013 14:07:57 +1100
Subject: powerpc: Implement arch_get_random_long/int() for powernv

Add the plumbing to implement arch_get_random_long/int(). It didn't seem
worth adding an extra ppc_md hook for int, so we reuse the one for long.

Add an implementation for powernv based on the hwrng found in power7+
systems. We whiten the output of the hwrng, and the result passes all
the dieharder tests.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/Kconfig                    |   3 +
 arch/powerpc/include/asm/archrandom.h   |  32 +++++++++
 arch/powerpc/include/asm/machdep.h      |   4 ++
 arch/powerpc/platforms/powernv/Kconfig  |   1 +
 arch/powerpc/platforms/powernv/Makefile |   2 +-
 arch/powerpc/platforms/powernv/rng.c    | 122 ++++++++++++++++++++++++++++++++
 6 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/archrandom.h
 create mode 100644 arch/powerpc/platforms/powernv/rng.c

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c5a868bc7aa4..875d815a8e7f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1012,6 +1012,9 @@ config PHYSICAL_START
 	default "0x00000000"
 endif
 
+config	ARCH_RANDOM
+	def_bool n
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
new file mode 100644
index 000000000000..d853d163ba47
--- /dev/null
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -0,0 +1,32 @@
+#ifndef _ASM_POWERPC_ARCHRANDOM_H
+#define _ASM_POWERPC_ARCHRANDOM_H
+
+#ifdef CONFIG_ARCH_RANDOM
+
+#include <asm/machdep.h>
+
+static inline int arch_get_random_long(unsigned long *v)
+{
+	if (ppc_md.get_random_long)
+		return ppc_md.get_random_long(v);
+
+	return 0;
+}
+
+static inline int arch_get_random_int(unsigned int *v)
+{
+	unsigned long val;
+	int rc;
+
+	rc = arch_get_random_long(&val);
+	if (rc)
+		*v = val;
+
+	return rc;
+}
+
+int powernv_get_random_long(unsigned long *v);
+
+#endif /* CONFIG_ARCH_RANDOM */
+
+#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 8b480901165a..ce6cc2a7b8b9 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -263,6 +263,10 @@ struct machdep_calls {
 	ssize_t (*cpu_probe)(const char *, size_t);
 	ssize_t (*cpu_release)(const char *, size_t);
 #endif
+
+#ifdef CONFIG_ARCH_RANDOM
+	int (*get_random_long)(unsigned long *v);
+#endif
 };
 
 extern void e500_idle(void);
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 6fae5eb99ea6..21084645c70a 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -9,6 +9,7 @@ config PPC_POWERNV
 	select EPAPR_BOOT
 	select PPC_INDIRECT_PIO
 	select PPC_UDBG_16550
+	select ARCH_RANDOM
 	default y
 
 config POWERNV_MSI
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 300c437d713c..6760a86d5bd0 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -1,5 +1,5 @@
 obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o
-obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o
+obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o rng.o
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
new file mode 100644
index 000000000000..02db7d73cf8b
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)	"powernv-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <asm/archrandom.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+
+
+struct powernv_rng {
+	void __iomem *regs;
+	unsigned long mask;
+};
+
+static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
+
+
+static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
+{
+	unsigned long parity;
+
+	/* Calculate the parity of the value */
+	asm ("popcntd %0,%1" : "=r" (parity) : "r" (val));
+
+	/* xor our value with the previous mask */
+	val ^= rng->mask;
+
+	/* update the mask based on the parity of this value */
+	rng->mask = (rng->mask << 1) | (parity & 1);
+
+	return val;
+}
+
+int powernv_get_random_long(unsigned long *v)
+{
+	struct powernv_rng *rng;
+
+	rng = get_cpu_var(powernv_rng);
+
+	*v = rng_whiten(rng, in_be64(rng->regs));
+
+	put_cpu_var(rng);
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(powernv_get_random_long);
+
+static __init void rng_init_per_cpu(struct powernv_rng *rng,
+				    struct device_node *dn)
+{
+	int chip_id, cpu;
+
+	chip_id = of_get_ibm_chip_id(dn);
+	if (chip_id == -1)
+		pr_warn("No ibm,chip-id found for %s.\n", dn->full_name);
+
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(powernv_rng, cpu) == NULL ||
+		    cpu_to_chip_id(cpu) == chip_id) {
+			per_cpu(powernv_rng, cpu) = rng;
+		}
+	}
+}
+
+static __init int rng_create(struct device_node *dn)
+{
+	struct powernv_rng *rng;
+	unsigned long val;
+
+	rng = kzalloc(sizeof(*rng), GFP_KERNEL);
+	if (!rng)
+		return -ENOMEM;
+
+	rng->regs = of_iomap(dn, 0);
+	if (!rng->regs) {
+		kfree(rng);
+		return -ENXIO;
+	}
+
+	val = in_be64(rng->regs);
+	rng->mask = val;
+
+	rng_init_per_cpu(rng, dn);
+
+	pr_info_once("Registering arch random hook.\n");
+
+	ppc_md.get_random_long = powernv_get_random_long;
+
+	return 0;
+}
+
+static __init int rng_init(void)
+{
+	struct device_node *dn;
+	int rc;
+
+	for_each_compatible_node(dn, NULL, "ibm,power-rng") {
+		rc = rng_create(dn);
+		if (rc) {
+			pr_err("Failed creating rng for %s (%d).\n",
+				dn->full_name, rc);
+			continue;
+		}
+
+		/* Create devices for hwrng driver */
+		of_platform_device_create(dn, NULL, NULL);
+	}
+
+	return 0;
+}
+subsys_initcall(rng_init);
-- 
cgit v1.2.3


From ac237b65f56c9b80d7774c35ccce15a74d445621 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 29 Aug 2013 16:55:07 +1000
Subject: powerpc: Enable /dev/port when isa_io_special is set

isa_io_special is set when the platform provides a "special"
implementation of inX/outX via some FW interface for example.

Such a platform doesn't need an ISA bridge on PCI, and so /dev/port
should be made available even if one isn't present.

This makes the LPC bus IOs accessible via /dev/port on PowerNV Power8

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index db1f29673c06..575fbf81fad0 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -21,7 +21,7 @@ extern struct pci_dev *isa_bridge_pcidev;
 /*
  * has legacy ISA devices ?
  */
-#define arch_has_dev_port()	(isa_bridge_pcidev != NULL)
+#define arch_has_dev_port()	(isa_bridge_pcidev != NULL || isa_io_special)
 #endif
 
 #include <linux/device.h>
-- 
cgit v1.2.3


From aaa63093dd4c393391a3368e1c7305b0cc620571 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 29 Aug 2013 16:55:45 +1000
Subject: powerpc/scom: Change scom_read() and scom_write() to return errors

scom_read() now returns the read value via a pointer argument and
both functions return an int error code

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/scom.h       | 23 +++++++++++++++++------
 arch/powerpc/platforms/wsp/scom_smp.c | 18 +++++++++++++-----
 arch/powerpc/platforms/wsp/scom_wsp.c | 12 ++++++++----
 arch/powerpc/platforms/wsp/wsp.c      | 13 +++++++------
 arch/powerpc/sysdev/scom.c            |  3 +--
 5 files changed, 46 insertions(+), 23 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h
index 0cabfd7bc2d1..07dcdcfdaefc 100644
--- a/arch/powerpc/include/asm/scom.h
+++ b/arch/powerpc/include/asm/scom.h
@@ -54,8 +54,8 @@ struct scom_controller {
 	scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
 	void (*unmap)(scom_map_t map);
 
-	u64 (*read)(scom_map_t map, u32 reg);
-	void (*write)(scom_map_t map, u32 reg, u64 value);
+	int (*read)(scom_map_t map, u32 reg, u64 *value);
+	int (*write)(scom_map_t map, u32 reg, u64 value);
 };
 
 extern const struct scom_controller *scom_controller;
@@ -133,10 +133,18 @@ static inline void scom_unmap(scom_map_t map)
  * scom_read - Read a SCOM register
  * @map: Result of scom_map
  * @reg: Register index within that map
+ * @value: Updated with the value read
+ *
+ * Returns 0 (success) or a negative error code
  */
-static inline u64 scom_read(scom_map_t map, u32 reg)
+static inline int scom_read(scom_map_t map, u32 reg, u64 *value)
 {
-	return scom_controller->read(map, reg);
+	int rc;
+
+	rc = scom_controller->read(map, reg, value);
+	if (rc)
+		*value = 0xfffffffffffffffful;
+	return rc;
 }
 
 /**
@@ -144,12 +152,15 @@ static inline u64 scom_read(scom_map_t map, u32 reg)
  * @map: Result of scom_map
  * @reg: Register index within that map
  * @value: Value to write
+ *
+ * Returns 0 (success) or a negative error code
  */
-static inline void scom_write(scom_map_t map, u32 reg, u64 value)
+static inline int scom_write(scom_map_t map, u32 reg, u64 value)
 {
-	scom_controller->write(map, reg, value);
+	return scom_controller->write(map, reg, value);
 }
 
+
 #endif /* CONFIG_PPC_SCOM */
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c
index b56b70aeb497..268bc899c1f7 100644
--- a/arch/powerpc/platforms/wsp/scom_smp.c
+++ b/arch/powerpc/platforms/wsp/scom_smp.c
@@ -116,7 +116,14 @@ static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask)
 
 	scom_write(scom, SCOM_RAMIC, cmd);
 
-	while (!((val = scom_read(scom, SCOM_RAMC)) & mask)) {
+	for (;;) {
+		if (scom_read(scom, SCOM_RAMC, &val) != 0) {
+			pr_err("SCOM error on instruction 0x%08x, thread %d\n",
+			       insn, thread);
+			return -1;
+		}
+		if (val & mask)
+			break;
 		pr_devel("Waiting on RAMC = 0x%llx\n", val);
 		if (++n == 3) {
 			pr_err("RAMC timeout on instruction 0x%08x, thread %d\n",
@@ -151,9 +158,7 @@ static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt,
 	if (rc)
 		return rc;
 
-	*out_gpr = scom_read(scom, SCOM_RAMD);
-
-	return 0;
+	return scom_read(scom, SCOM_RAMD, out_gpr);
 }
 
 static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr)
@@ -353,7 +358,10 @@ int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, struct device_node *np)
 
 	pr_devel("Bringing up CPU%d using SCOM...\n", lcpu);
 
-	pccr0 = scom_read(scom, SCOM_PCCR0);
+	if (scom_read(scom, SCOM_PCCR0, &pccr0) != 0) {
+		printk(KERN_ERR "XSCOM failure readng PCCR0 on CPU%d\n", lcpu);
+		return -1;
+	}
 	scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG |
 				     SCOM_PCCR0_ENABLE_RAM);
 
diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c
index 4052e2259f30..54172c4a8a64 100644
--- a/arch/powerpc/platforms/wsp/scom_wsp.c
+++ b/arch/powerpc/platforms/wsp/scom_wsp.c
@@ -50,18 +50,22 @@ static void wsp_scom_unmap(scom_map_t map)
 	iounmap((void *)map);
 }
 
-static u64 wsp_scom_read(scom_map_t map, u32 reg)
+static int wsp_scom_read(scom_map_t map, u32 reg, u64 *value)
 {
 	u64 __iomem *addr = (u64 __iomem *)map;
 
-	return in_be64(addr + reg);
+	*value = in_be64(addr + reg);
+
+	return 0;
 }
 
-static void wsp_scom_write(scom_map_t map, u32 reg, u64 value)
+static int wsp_scom_write(scom_map_t map, u32 reg, u64 value)
 {
 	u64 __iomem *addr = (u64 __iomem *)map;
 
-	return out_be64(addr + reg, value);
+	out_be64(addr + reg, value);
+
+	return 0;
 }
 
 static const struct scom_controller wsp_scom_controller = {
diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c
index d25cc96c21b8..ddb6efe88914 100644
--- a/arch/powerpc/platforms/wsp/wsp.c
+++ b/arch/powerpc/platforms/wsp/wsp.c
@@ -89,6 +89,7 @@ void wsp_halt(void)
 	struct device_node *dn;
 	struct device_node *mine;
 	struct device_node *me;
+	int rc;
 
 	me = of_get_cpu_node(smp_processor_id(), NULL);
 	mine = scom_find_parent(me);
@@ -101,15 +102,15 @@ void wsp_halt(void)
 
 		/* read-modify-write it so the HW probe does not get
 		 * confused */
-		val = scom_read(m, 0);
-		val |= 1;
-		scom_write(m, 0, val);
+		rc = scom_read(m, 0, &val);
+		if (rc == 0)
+			scom_write(m, 0, val | 1);
 		scom_unmap(m);
 	}
 	m = scom_map(mine, 0, 1);
-	val = scom_read(m, 0);
-	val |= 1;
-	scom_write(m, 0, val);
+	rc = scom_read(m, 0, &val);
+	if (rc == 0)
+		scom_write(m, 0, val | 1);
 	/* should never return */
 	scom_unmap(m);
 }
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
index 9193e12df695..10f1d9e56612 100644
--- a/arch/powerpc/sysdev/scom.c
+++ b/arch/powerpc/sysdev/scom.c
@@ -137,8 +137,7 @@ static int scom_val_get(void *data, u64 *val)
 	if (!scom_map_ok(ent->map))
 		return -EFAULT;
 
-	*val = scom_read(ent->map, 0);
-	return 0;
+	return scom_read(ent->map, 0, val);
 }
 DEFINE_SIMPLE_ATTRIBUTE(scom_val_fops, scom_val_get, scom_val_set,
 			"0x%llx\n");
-- 
cgit v1.2.3


From 8c6852e036daa512376de381a3b61547d90465d4 Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Fri, 6 Sep 2013 09:00:04 +0800
Subject: powerpc/eeh: Output PHB3 diag-data

The patch adds function ioda_eeh_phb3_phb_diag() to dump PHB3
PHB diag-data. That's called while detecting informative errors
or frozen PE on the specific PHB.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/opal.h           | 65 ++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/eeh-ioda.c | 70 +++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 51e3b265ec12..4cc33ba1edd3 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -460,10 +460,12 @@ enum {
 
 enum {
 	OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
+	OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2
 };
 
 enum {
 	OPAL_P7IOC_NUM_PEST_REGS = 128,
+	OPAL_PHB3_NUM_PEST_REGS = 256
 };
 
 struct OpalIoPhbErrorCommon {
@@ -531,6 +533,69 @@ struct OpalIoP7IOCPhbErrorData {
 	uint64_t pestB[OPAL_P7IOC_NUM_PEST_REGS];
 };
 
+struct OpalIoPhb3ErrorData {
+	struct OpalIoPhbErrorCommon common;
+
+	uint32_t brdgCtl;
+
+	/* PHB3 UTL regs */
+	uint32_t portStatusReg;
+	uint32_t rootCmplxStatus;
+	uint32_t busAgentStatus;
+
+	/* PHB3 cfg regs */
+	uint32_t deviceStatus;
+	uint32_t slotStatus;
+	uint32_t linkStatus;
+	uint32_t devCmdStatus;
+	uint32_t devSecStatus;
+
+	/* cfg AER regs */
+	uint32_t rootErrorStatus;
+	uint32_t uncorrErrorStatus;
+	uint32_t corrErrorStatus;
+	uint32_t tlpHdr1;
+	uint32_t tlpHdr2;
+	uint32_t tlpHdr3;
+	uint32_t tlpHdr4;
+	uint32_t sourceId;
+
+	uint32_t rsv3;
+
+	/* Record data about the call to allocate a buffer */
+	uint64_t errorClass;
+	uint64_t correlator;
+
+	uint64_t nFir;			/* 000 */
+	uint64_t nFirMask;		/* 003 */
+	uint64_t nFirWOF;		/* 008 */
+
+	/* PHB3 MMIO Error Regs */
+	uint64_t phbPlssr;		/* 120 */
+	uint64_t phbCsr;		/* 110 */
+	uint64_t lemFir;		/* C00 */
+	uint64_t lemErrorMask;		/* C18 */
+	uint64_t lemWOF;		/* C40 */
+	uint64_t phbErrorStatus;	/* C80 */
+	uint64_t phbFirstErrorStatus;	/* C88 */
+	uint64_t phbErrorLog0;		/* CC0 */
+	uint64_t phbErrorLog1;		/* CC8 */
+	uint64_t mmioErrorStatus;	/* D00 */
+	uint64_t mmioFirstErrorStatus;	/* D08 */
+	uint64_t mmioErrorLog0;		/* D40 */
+	uint64_t mmioErrorLog1;		/* D48 */
+	uint64_t dma0ErrorStatus;	/* D80 */
+	uint64_t dma0FirstErrorStatus;	/* D88 */
+	uint64_t dma0ErrorLog0;		/* DC0 */
+	uint64_t dma0ErrorLog1;		/* DC8 */
+	uint64_t dma1ErrorStatus;	/* E00 */
+	uint64_t dma1FirstErrorStatus;	/* E08 */
+	uint64_t dma1ErrorLog0;		/* E40 */
+	uint64_t dma1ErrorLog1;		/* E48 */
+	uint64_t pestA[OPAL_PHB3_NUM_PEST_REGS];
+	uint64_t pestB[OPAL_PHB3_NUM_PEST_REGS];
+};
+
 typedef struct oppanel_line {
 	const char * 	line;
 	uint64_t 	line_len;
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index f426f4c24fb6..02245cee7818 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -747,6 +747,73 @@ static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose,
 	}
 }
 
+static void ioda_eeh_phb3_phb_diag(struct pci_controller *hose,
+				    struct OpalIoPhbErrorCommon *common)
+{
+	struct OpalIoPhb3ErrorData *data;
+	int i;
+
+	data = (struct OpalIoPhb3ErrorData*)common;
+	pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n\n",
+		hose->global_number, common->version);
+
+	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
+
+	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
+	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
+	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
+
+	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
+	pr_info("  slotStatus:           %08x\n", data->slotStatus);
+	pr_info("  linkStatus:           %08x\n", data->linkStatus);
+	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
+	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
+
+	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
+	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
+	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
+	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
+	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
+	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
+	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
+	pr_info("  sourceId:             %08x\n", data->sourceId);
+	pr_info("  errorClass:           %016llx\n", data->errorClass);
+	pr_info("  correlator:           %016llx\n", data->correlator);
+	pr_info("  nFir:                 %016llx\n", data->nFir);
+	pr_info("  nFirMask:             %016llx\n", data->nFirMask);
+	pr_info("  nFirWOF:              %016llx\n", data->nFirWOF);
+	pr_info("  PhbPlssr:             %016llx\n", data->phbPlssr);
+	pr_info("  PhbCsr:               %016llx\n", data->phbCsr);
+	pr_info("  lemFir:               %016llx\n", data->lemFir);
+	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
+	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
+	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
+	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
+	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
+	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
+	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
+	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
+	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
+	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
+	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
+	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
+	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
+	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
+	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
+	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
+	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
+	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+
+	for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
+		if ((data->pestA[i] >> 63) == 0 &&
+		    (data->pestB[i] >> 63) == 0)
+			continue;
+
+		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
+		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+	}
+}
+
 static void ioda_eeh_phb_diag(struct pci_controller *hose)
 {
 	struct pnv_phb *phb = hose->private_data;
@@ -765,6 +832,9 @@ static void ioda_eeh_phb_diag(struct pci_controller *hose)
 	case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
 		ioda_eeh_p7ioc_phb_diag(hose, common);
 		break;
+	case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
+		ioda_eeh_phb3_phb_diag(hose, common);
+		break;
 	default:
 		pr_warning("%s: Unrecognized I/O chip %d\n",
 			   __func__, common->ioType);
-- 
cgit v1.2.3


From 8e0861fa3c4edfc2f30dd4cf4d58d3929f7c1b23 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 28 Aug 2013 18:37:42 +1000
Subject: powerpc: Prepare to support kernel handling of IOMMU map/unmap

The current VFIO-on-POWER implementation supports only user mode
driven mapping, i.e. QEMU is sending requests to map/unmap pages.
However this approach is really slow, so we want to move that to KVM.
Since H_PUT_TCE can be extremely performance sensitive (especially with
network adapters where each packet needs to be mapped/unmapped) we chose
to implement that as a "fast" hypercall directly in "real
mode" (processor still in the guest context but MMU off).

To be able to do that, we need to provide some facilities to
access the struct page count within that real mode environment as things
like the sparsemem vmemmap mappings aren't accessible.

This adds an API function realmode_pfn_to_page() to get page struct when
MMU is off.

This adds to MM a new function put_page_unless_one() which drops a page
if counter is bigger than 1. It is going to be used when MMU is off
(for example, real mode on PPC64) and we want to make sure that page
release will not happen in real mode as it may crash the kernel in
a horrible way.

CONFIG_SPARSEMEM_VMEMMAP and CONFIG_FLATMEM are supported.

Cc: linux-mm@kvack.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/pgtable-ppc64.h |  2 ++
 arch/powerpc/mm/init_64.c                | 51 +++++++++++++++++++++++++++++++-
 include/linux/mm.h                       | 14 +++++++++
 include/linux/page-flags.h               |  4 ++-
 4 files changed, 69 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 46db09414a10..4a191c472867 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -394,6 +394,8 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
 	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
 }
 
+struct page *realmode_pfn_to_page(unsigned long pfn);
+
 static inline char *get_hpte_slot_array(pmd_t *pmdp)
 {
 	/*
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 8ed035d2edb5..e3734edffa69 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -304,5 +304,54 @@ void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
 }
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
+/*
+ * We do not have access to the sparsemem vmemmap, so we fallback to
+ * walking the list of sparsemem blocks which we already maintain for
+ * the sake of crashdump. In the long run, we might want to maintain
+ * a tree if performance of that linear walk becomes a problem.
+ *
+ * realmode_pfn_to_page functions can fail due to:
+ * 1) As real sparsemem blocks do not lay in RAM continously (they
+ * are in virtual address space which is not available in the real mode),
+ * the requested page struct can be split between blocks so get_page/put_page
+ * may fail.
+ * 2) When huge pages are used, the get_page/put_page API will fail
+ * in real mode as the linked addresses in the page struct are virtual
+ * too.
+ */
+struct page *realmode_pfn_to_page(unsigned long pfn)
+{
+	struct vmemmap_backing *vmem_back;
+	struct page *page;
+	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
+	unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
+
+	for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
+		if (pg_va < vmem_back->virt_addr)
+			continue;
+
+		/* Check that page struct is not split between real pages */
+		if ((pg_va + sizeof(struct page)) >
+				(vmem_back->virt_addr + page_size))
+			return NULL;
+
+		page = (struct page *) (vmem_back->phys + pg_va -
+				vmem_back->virt_addr);
+		return page;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
+
+#elif defined(CONFIG_FLATMEM)
+
+struct page *realmode_pfn_to_page(unsigned long pfn)
+{
+	struct page *page = pfn_to_page(pfn);
+	return page;
+}
+EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
+
+#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b6e55ee8855..1a0668e5a4ee 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -297,12 +297,26 @@ static inline int put_page_testzero(struct page *page)
 /*
  * Try to grab a ref unless the page has a refcount of zero, return false if
  * that is the case.
+ * This can be called when MMU is off so it must not access
+ * any of the virtual mappings.
  */
 static inline int get_page_unless_zero(struct page *page)
 {
 	return atomic_inc_not_zero(&page->_count);
 }
 
+/*
+ * Try to drop a ref unless the page has a refcount of one, return false if
+ * that is the case.
+ * This is to make sure that the refcount won't become zero after this drop.
+ * This can be called when MMU is off so it must not access
+ * any of the virtual mappings.
+ */
+static inline int put_page_unless_one(struct page *page)
+{
+	return atomic_add_unless(&page->_count, -1, 1);
+}
+
 extern int page_is_ram(unsigned long pfn);
 
 /* Support for virtually mapped pages */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6d53675c2b54..98ada58f9942 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -329,7 +329,9 @@ static inline void set_page_writeback(struct page *page)
  * System with lots of page flags available. This allows separate
  * flags for PageHead() and PageTail() checks of compound pages so that bit
  * tests can be used in performance sensitive paths. PageCompound is
- * generally not used in hot code paths.
+ * generally not used in hot code paths except arch/powerpc/mm/init_64.c
+ * and arch/powerpc/kvm/book3s_64_vio_hv.c which use it to detect huge pages
+ * and avoid handling those in real mode.
  */
 __PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
 __PAGEFLAG(Tail, tail)
-- 
cgit v1.2.3


From 8e0a1611cb891e72a9affc4a8ee4795c634896a6 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 28 Aug 2013 18:37:43 +1000
Subject: powerpc: add real mode support for dma operations on powernv

The existing TCE machine calls (tce_build and tce_free) only support
virtual mode as they call __raw_writeq for TCE invalidation what
fails in real mode.

This introduces tce_build_rm and tce_free_rm real mode versions
which do mostly the same but use "Store Doubleword Caching Inhibited
Indexed" instruction for TCE invalidation.

This new feature is going to be utilized by real mode support of VFIO.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/machdep.h        | 12 ++++++++
 arch/powerpc/platforms/powernv/pci-ioda.c | 49 +++++++++++++++++++++++--------
 arch/powerpc/platforms/powernv/pci.c      | 42 ++++++++++++++++++++++----
 arch/powerpc/platforms/powernv/pci.h      |  3 +-
 4 files changed, 87 insertions(+), 19 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 8b480901165a..07dd3b1312e5 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -78,6 +78,18 @@ struct machdep_calls {
 				    long index);
 	void		(*tce_flush)(struct iommu_table *tbl);
 
+	/* _rm versions are for real mode use only */
+	int		(*tce_build_rm)(struct iommu_table *tbl,
+				     long index,
+				     long npages,
+				     unsigned long uaddr,
+				     enum dma_data_direction direction,
+				     struct dma_attrs *attrs);
+	void		(*tce_free_rm)(struct iommu_table *tbl,
+				    long index,
+				    long npages);
+	void		(*tce_flush_rm)(struct iommu_table *tbl);
+
 	void __iomem *	(*ioremap)(phys_addr_t addr, unsigned long size,
 				   unsigned long flags, void *caller);
 	void		(*iounmap)(volatile void __iomem *token);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 74a5a5773b1f..307015d9cd99 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -70,6 +70,16 @@ define_pe_printk_level(pe_err, KERN_ERR);
 define_pe_printk_level(pe_warn, KERN_WARNING);
 define_pe_printk_level(pe_info, KERN_INFO);
 
+/*
+ * stdcix is only supposed to be used in hypervisor real mode as per
+ * the architecture spec
+ */
+static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
+{
+	__asm__ __volatile__("stdcix %0,0,%1"
+		: : "r" (val), "r" (paddr) : "memory");
+}
+
 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
 	unsigned long pe;
@@ -454,10 +464,13 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
 	}
 }
 
-static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
-					 u64 *startp, u64 *endp)
+static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
+					 struct iommu_table *tbl,
+					 u64 *startp, u64 *endp, bool rm)
 {
-	u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+	u64 __iomem *invalidate = rm ?
+		(u64 __iomem *)pe->tce_inval_reg_phys :
+		(u64 __iomem *)tbl->it_index;
 	unsigned long start, end, inc;
 
 	start = __pa(startp);
@@ -484,7 +497,10 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
 
         mb(); /* Ensure above stores are visible */
         while (start <= end) {
-                __raw_writeq(start, invalidate);
+		if (rm)
+			__raw_rm_writeq(start, invalidate);
+		else
+			__raw_writeq(start, invalidate);
                 start += inc;
         }
 
@@ -496,10 +512,12 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
 
 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 					 struct iommu_table *tbl,
-					 u64 *startp, u64 *endp)
+					 u64 *startp, u64 *endp, bool rm)
 {
 	unsigned long start, end, inc;
-	u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+	u64 __iomem *invalidate = rm ?
+		(u64 __iomem *)pe->tce_inval_reg_phys :
+		(u64 __iomem *)tbl->it_index;
 
 	/* We'll invalidate DMA address in PE scope */
 	start = 0x2ul << 60;
@@ -515,22 +533,25 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 	mb();
 
 	while (start <= end) {
-		__raw_writeq(start, invalidate);
+		if (rm)
+			__raw_rm_writeq(start, invalidate);
+		else
+			__raw_writeq(start, invalidate);
 		start += inc;
 	}
 }
 
 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
-				 u64 *startp, u64 *endp)
+				 u64 *startp, u64 *endp, bool rm)
 {
 	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
 					      tce32_table);
 	struct pnv_phb *phb = pe->phb;
 
 	if (phb->type == PNV_PHB_IODA1)
-		pnv_pci_ioda1_tce_invalidate(tbl, startp, endp);
+		pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
 	else
-		pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp);
+		pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
 }
 
 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
@@ -603,7 +624,9 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 		 * bus number, print that out instead.
 		 */
 		tbl->it_busno = 0;
-		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
+		pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
+		tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
+				8);
 		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
 			       TCE_PCI_SWINV_PAIR;
 	}
@@ -681,7 +704,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 		 * bus number, print that out instead.
 		 */
 		tbl->it_busno = 0;
-		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
+		pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
+		tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
+				8);
 		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
 	}
 	iommu_init_table(tbl, phb->hose->node);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index a28d3b5e6393..420abe3baab9 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -401,7 +401,7 @@ struct pci_ops pnv_pci_ops = {
 
 static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 			 unsigned long uaddr, enum dma_data_direction direction,
-			 struct dma_attrs *attrs)
+			 struct dma_attrs *attrs, bool rm)
 {
 	u64 proto_tce;
 	u64 *tcep, *tces;
@@ -423,12 +423,22 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 	 * of flags if that becomes the case
 	 */
 	if (tbl->it_type & TCE_PCI_SWINV_CREATE)
-		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1);
+		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
 
 	return 0;
 }
 
-static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
+			    unsigned long uaddr,
+			    enum dma_data_direction direction,
+			    struct dma_attrs *attrs)
+{
+	return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
+			false);
+}
+
+static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
+		bool rm)
 {
 	u64 *tcep, *tces;
 
@@ -438,7 +448,12 @@ static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
 		*(tcep++) = 0;
 
 	if (tbl->it_type & TCE_PCI_SWINV_FREE)
-		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1);
+		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
+}
+
+static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
+{
+	pnv_tce_free(tbl, index, npages, false);
 }
 
 static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
@@ -446,6 +461,19 @@ static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
 	return ((u64 *)tbl->it_base)[index - tbl->it_offset];
 }
 
+static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages,
+			    unsigned long uaddr,
+			    enum dma_data_direction direction,
+			    struct dma_attrs *attrs)
+{
+	return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true);
+}
+
+static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages)
+{
+	pnv_tce_free(tbl, index, npages, true);
+}
+
 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 			       void *tce_mem, u64 tce_size,
 			       u64 dma_offset)
@@ -610,8 +638,10 @@ void __init pnv_pci_init(void)
 
 	/* Configure IOMMU DMA hooks */
 	ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup;
-	ppc_md.tce_build = pnv_tce_build;
-	ppc_md.tce_free = pnv_tce_free;
+	ppc_md.tce_build = pnv_tce_build_vm;
+	ppc_md.tce_free = pnv_tce_free_vm;
+	ppc_md.tce_build_rm = pnv_tce_build_rm;
+	ppc_md.tce_free_rm = pnv_tce_free_rm;
 	ppc_md.tce_get = pnv_tce_get;
 	ppc_md.pci_probe_mode = pnv_pci_probe_mode;
 	set_pci_dma_ops(&dma_iommu_ops);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index d633c64e05a1..170dd98629d7 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -52,6 +52,7 @@ struct pnv_ioda_pe {
 	int			tce32_seg;
 	int			tce32_segcount;
 	struct iommu_table	tce32_table;
+	phys_addr_t		tce_inval_reg_phys;
 
 	/* XXX TODO: Add support for additional 64-bit iommus */
 
@@ -193,6 +194,6 @@ extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
-					u64 *startp, u64 *endp);
+					u64 *startp, u64 *endp, bool rm);
 
 #endif /* __POWERNV_PCI_H */
-- 
cgit v1.2.3


From de79f7b9f6f92ec1bd6f61fa1f20de60728a5b5e Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 10 Sep 2013 20:20:42 +1000
Subject: powerpc: Put FP/VSX and VR state into structures

This creates new 'thread_fp_state' and 'thread_vr_state' structures
to store FP/VSX state (including FPSCR) and Altivec/VSX state
(including VSCR), and uses them in the thread_struct.  In the
thread_fp_state, the FPRs and VSRs are represented as u64 rather
than double, since we rarely perform floating-point computations
on the values, and this will enable the structures to be used
in KVM code as well.  Similarly FPSCR is now a u64 rather than
a structure of two 32-bit values.

This takes the offsets out of the macros such as SAVE_32FPRS,
REST_32FPRS, etc.  This enables the same macros to be used for normal
and transactional state, enabling us to delete the transactional
versions of the macros.   This also removes the unused do_load_up_fpu
and do_load_up_altivec, which were in fact buggy since they didn't
create large enough stack frames to account for the fact that
load_up_fpu and load_up_altivec are not designed to be called from C
and assume that their caller's stack frame is an interrupt frame.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/ppc_asm.h     | 95 +++-------------------------------
 arch/powerpc/include/asm/processor.h   | 40 +++++++-------
 arch/powerpc/include/asm/sfp-machine.h |  2 +-
 arch/powerpc/kernel/align.c            |  6 +--
 arch/powerpc/kernel/asm-offsets.c      | 25 +++------
 arch/powerpc/kernel/fpu.S              | 59 +++++----------------
 arch/powerpc/kernel/process.c          |  8 ++-
 arch/powerpc/kernel/ptrace.c           | 49 +++++++++---------
 arch/powerpc/kernel/ptrace32.c         | 11 ++--
 arch/powerpc/kernel/signal_32.c        | 72 +++++++++++++-------------
 arch/powerpc/kernel/signal_64.c        | 29 ++++++-----
 arch/powerpc/kernel/tm.S               | 41 ++++++++-------
 arch/powerpc/kernel/traps.c            | 10 ++--
 arch/powerpc/kernel/vecemu.c           |  6 +--
 arch/powerpc/kernel/vector.S           | 50 ++++++------------
 arch/powerpc/kvm/book3s_pr.c           | 36 ++++++-------
 arch/powerpc/kvm/booke.c               | 19 ++++---
 17 files changed, 200 insertions(+), 358 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 599545738af3..140f67090f0b 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -98,123 +98,40 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 #define REST_8GPRS(n, base)	REST_4GPRS(n, base); REST_4GPRS(n+4, base)
 #define REST_10GPRS(n, base)	REST_8GPRS(n, base); REST_2GPRS(n+8, base)
 
-#define SAVE_FPR(n, base)	stfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#define SAVE_FPR(n, base)	stfd	n,8*TS_FPRWIDTH*(n)(base)
 #define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
 #define SAVE_4FPRS(n, base)	SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
 #define SAVE_8FPRS(n, base)	SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
 #define SAVE_16FPRS(n, base)	SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
 #define SAVE_32FPRS(n, base)	SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
-#define REST_FPR(n, base)	lfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#define REST_FPR(n, base)	lfd	n,8*TS_FPRWIDTH*(n)(base)
 #define REST_2FPRS(n, base)	REST_FPR(n, base); REST_FPR(n+1, base)
 #define REST_4FPRS(n, base)	REST_2FPRS(n, base); REST_2FPRS(n+2, base)
 #define REST_8FPRS(n, base)	REST_4FPRS(n, base); REST_4FPRS(n+4, base)
 #define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
 #define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
 
-#define SAVE_VR(n,b,base)	li b,THREAD_VR0+(16*(n));  stvx n,base,b
+#define SAVE_VR(n,b,base)	li b,16*(n);  stvx n,base,b
 #define SAVE_2VRS(n,b,base)	SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
 #define SAVE_4VRS(n,b,base)	SAVE_2VRS(n,b,base); SAVE_2VRS(n+2,b,base)
 #define SAVE_8VRS(n,b,base)	SAVE_4VRS(n,b,base); SAVE_4VRS(n+4,b,base)
 #define SAVE_16VRS(n,b,base)	SAVE_8VRS(n,b,base); SAVE_8VRS(n+8,b,base)
 #define SAVE_32VRS(n,b,base)	SAVE_16VRS(n,b,base); SAVE_16VRS(n+16,b,base)
-#define REST_VR(n,b,base)	li b,THREAD_VR0+(16*(n)); lvx n,base,b
+#define REST_VR(n,b,base)	li b,16*(n); lvx n,base,b
 #define REST_2VRS(n,b,base)	REST_VR(n,b,base); REST_VR(n+1,b,base)
 #define REST_4VRS(n,b,base)	REST_2VRS(n,b,base); REST_2VRS(n+2,b,base)
 #define REST_8VRS(n,b,base)	REST_4VRS(n,b,base); REST_4VRS(n+4,b,base)
 #define REST_16VRS(n,b,base)	REST_8VRS(n,b,base); REST_8VRS(n+8,b,base)
 #define REST_32VRS(n,b,base)	REST_16VRS(n,b,base); REST_16VRS(n+16,b,base)
 
-/* Save/restore FPRs, VRs and VSRs from their checkpointed backups in
- * thread_struct:
- */
-#define SAVE_FPR_TRANSACT(n, base)	stfd n,THREAD_TRANSACT_FPR0+	\
-					8*TS_FPRWIDTH*(n)(base)
-#define SAVE_2FPRS_TRANSACT(n, base)	SAVE_FPR_TRANSACT(n, base);	\
-					SAVE_FPR_TRANSACT(n+1, base)
-#define SAVE_4FPRS_TRANSACT(n, base)	SAVE_2FPRS_TRANSACT(n, base);	\
-					SAVE_2FPRS_TRANSACT(n+2, base)
-#define SAVE_8FPRS_TRANSACT(n, base)	SAVE_4FPRS_TRANSACT(n, base);	\
-					SAVE_4FPRS_TRANSACT(n+4, base)
-#define SAVE_16FPRS_TRANSACT(n, base)	SAVE_8FPRS_TRANSACT(n, base);	\
-					SAVE_8FPRS_TRANSACT(n+8, base)
-#define SAVE_32FPRS_TRANSACT(n, base)	SAVE_16FPRS_TRANSACT(n, base);	\
-					SAVE_16FPRS_TRANSACT(n+16, base)
-
-#define REST_FPR_TRANSACT(n, base)	lfd	n,THREAD_TRANSACT_FPR0+	\
-					8*TS_FPRWIDTH*(n)(base)
-#define REST_2FPRS_TRANSACT(n, base)	REST_FPR_TRANSACT(n, base);	\
-					REST_FPR_TRANSACT(n+1, base)
-#define REST_4FPRS_TRANSACT(n, base)	REST_2FPRS_TRANSACT(n, base);	\
-					REST_2FPRS_TRANSACT(n+2, base)
-#define REST_8FPRS_TRANSACT(n, base)	REST_4FPRS_TRANSACT(n, base);	\
-					REST_4FPRS_TRANSACT(n+4, base)
-#define REST_16FPRS_TRANSACT(n, base)	REST_8FPRS_TRANSACT(n, base);	\
-					REST_8FPRS_TRANSACT(n+8, base)
-#define REST_32FPRS_TRANSACT(n, base)	REST_16FPRS_TRANSACT(n, base);	\
-					REST_16FPRS_TRANSACT(n+16, base)
-
-
-#define SAVE_VR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VR0+(16*(n)); \
-					stvx n,b,base
-#define SAVE_2VRS_TRANSACT(n,b,base)	SAVE_VR_TRANSACT(n,b,base);	\
-					SAVE_VR_TRANSACT(n+1,b,base)
-#define SAVE_4VRS_TRANSACT(n,b,base)	SAVE_2VRS_TRANSACT(n,b,base);	\
-					SAVE_2VRS_TRANSACT(n+2,b,base)
-#define SAVE_8VRS_TRANSACT(n,b,base)	SAVE_4VRS_TRANSACT(n,b,base);	\
-					SAVE_4VRS_TRANSACT(n+4,b,base)
-#define SAVE_16VRS_TRANSACT(n,b,base)	SAVE_8VRS_TRANSACT(n,b,base);	\
-					SAVE_8VRS_TRANSACT(n+8,b,base)
-#define SAVE_32VRS_TRANSACT(n,b,base)	SAVE_16VRS_TRANSACT(n,b,base);	\
-					SAVE_16VRS_TRANSACT(n+16,b,base)
-
-#define REST_VR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VR0+(16*(n)); \
-					lvx n,b,base
-#define REST_2VRS_TRANSACT(n,b,base)	REST_VR_TRANSACT(n,b,base);	\
-					REST_VR_TRANSACT(n+1,b,base)
-#define REST_4VRS_TRANSACT(n,b,base)	REST_2VRS_TRANSACT(n,b,base);	\
-					REST_2VRS_TRANSACT(n+2,b,base)
-#define REST_8VRS_TRANSACT(n,b,base)	REST_4VRS_TRANSACT(n,b,base);	\
-					REST_4VRS_TRANSACT(n+4,b,base)
-#define REST_16VRS_TRANSACT(n,b,base)	REST_8VRS_TRANSACT(n,b,base);	\
-					REST_8VRS_TRANSACT(n+8,b,base)
-#define REST_32VRS_TRANSACT(n,b,base)	REST_16VRS_TRANSACT(n,b,base);	\
-					REST_16VRS_TRANSACT(n+16,b,base)
-
-
-#define SAVE_VSR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VSR0+(16*(n)); \
-					STXVD2X(n,R##base,R##b)
-#define SAVE_2VSRS_TRANSACT(n,b,base)	SAVE_VSR_TRANSACT(n,b,base);	\
-	                                SAVE_VSR_TRANSACT(n+1,b,base)
-#define SAVE_4VSRS_TRANSACT(n,b,base)	SAVE_2VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_2VSRS_TRANSACT(n+2,b,base)
-#define SAVE_8VSRS_TRANSACT(n,b,base)	SAVE_4VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_4VSRS_TRANSACT(n+4,b,base)
-#define SAVE_16VSRS_TRANSACT(n,b,base)	SAVE_8VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_8VSRS_TRANSACT(n+8,b,base)
-#define SAVE_32VSRS_TRANSACT(n,b,base)	SAVE_16VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_16VSRS_TRANSACT(n+16,b,base)
-
-#define REST_VSR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VSR0+(16*(n)); \
-					LXVD2X(n,R##base,R##b)
-#define REST_2VSRS_TRANSACT(n,b,base)	REST_VSR_TRANSACT(n,b,base);    \
-	                                REST_VSR_TRANSACT(n+1,b,base)
-#define REST_4VSRS_TRANSACT(n,b,base)	REST_2VSRS_TRANSACT(n,b,base);	\
-	                                REST_2VSRS_TRANSACT(n+2,b,base)
-#define REST_8VSRS_TRANSACT(n,b,base)	REST_4VSRS_TRANSACT(n,b,base);	\
-	                                REST_4VSRS_TRANSACT(n+4,b,base)
-#define REST_16VSRS_TRANSACT(n,b,base)	REST_8VSRS_TRANSACT(n,b,base);	\
-	                                REST_8VSRS_TRANSACT(n+8,b,base)
-#define REST_32VSRS_TRANSACT(n,b,base)	REST_16VSRS_TRANSACT(n,b,base);	\
-	                                REST_16VSRS_TRANSACT(n+16,b,base)
-
 /* Save the lower 32 VSRs in the thread VSR region */
-#define SAVE_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n));  STXVD2X(n,R##base,R##b)
+#define SAVE_VSR(n,b,base)	li b,16*(n);  STXVD2X(n,R##base,R##b)
 #define SAVE_2VSRS(n,b,base)	SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base)
 #define SAVE_4VSRS(n,b,base)	SAVE_2VSRS(n,b,base); SAVE_2VSRS(n+2,b,base)
 #define SAVE_8VSRS(n,b,base)	SAVE_4VSRS(n,b,base); SAVE_4VSRS(n+4,b,base)
 #define SAVE_16VSRS(n,b,base)	SAVE_8VSRS(n,b,base); SAVE_8VSRS(n+8,b,base)
 #define SAVE_32VSRS(n,b,base)	SAVE_16VSRS(n,b,base); SAVE_16VSRS(n+16,b,base)
-#define REST_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n)); LXVD2X(n,R##base,R##b)
+#define REST_VSR(n,b,base)	li b,16*(n); LXVD2X(n,R##base,R##b)
 #define REST_2VSRS(n,b,base)	REST_VSR(n,b,base); REST_VSR(n+1,b,base)
 #define REST_4VSRS(n,b,base)	REST_2VSRS(n,b,base); REST_2VSRS(n+2,b,base)
 #define REST_8VSRS(n,b,base)	REST_4VSRS(n,b,base); REST_4VSRS(n+4,b,base)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ce4de5aed7b5..afe695e9feb8 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -144,8 +144,20 @@ typedef struct {
 
 #define TS_FPROFFSET 0
 #define TS_VSRLOWOFFSET 1
-#define TS_FPR(i) fpr[i][TS_FPROFFSET]
-#define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET]
+#define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
+#define TS_TRANS_FPR(i) transact_fp.fpr[i][TS_FPROFFSET]
+
+/* FP and VSX 0-31 register set */
+struct thread_fp_state {
+	u64	fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
+	u64	fpscr;		/* Floating point status */
+};
+
+/* Complete AltiVec register set including VSCR */
+struct thread_vr_state {
+	vector128	vr[32] __attribute__((aligned(16)));
+	vector128	vscr __attribute__((aligned(16)));
+};
 
 struct thread_struct {
 	unsigned long	ksp;		/* Kernel stack pointer */
@@ -198,13 +210,7 @@ struct thread_struct {
 	unsigned long	dvc2;
 #endif
 #endif
-	/* FP and VSX 0-31 register set */
-	double		fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
-	struct {
-
-		unsigned int pad;
-		unsigned int val;	/* Floating point status */
-	} fpscr;
+	struct thread_fp_state	fp_state;
 	int		fpexc_mode;	/* floating-point exception mode */
 	unsigned int	align_ctl;	/* alignment handling control */
 #ifdef CONFIG_PPC64
@@ -222,10 +228,7 @@ struct thread_struct {
 	struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
 	unsigned long	trap_nr;	/* last trap # on this thread */
 #ifdef CONFIG_ALTIVEC
-	/* Complete AltiVec register set */
-	vector128	vr[32] __attribute__((aligned(16)));
-	/* AltiVec status */
-	vector128	vscr __attribute__((aligned(16)));
+	struct thread_vr_state vr_state;
 	unsigned long	vrsave;
 	int		used_vr;	/* set if process has used altivec */
 #endif /* CONFIG_ALTIVEC */
@@ -262,13 +265,8 @@ struct thread_struct {
 	 * transact_fpr[] is the new set of transactional values.
 	 * VRs work the same way.
 	 */
-	double		transact_fpr[32][TS_FPRWIDTH];
-	struct {
-		unsigned int pad;
-		unsigned int val;	/* Floating point status */
-	} transact_fpscr;
-	vector128	transact_vr[32] __attribute__((aligned(16)));
-	vector128	transact_vscr __attribute__((aligned(16)));
+	struct thread_fp_state transact_fp;
+	struct thread_vr_state transact_vr;
 	unsigned long	transact_vrsave;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
@@ -322,8 +320,6 @@ struct thread_struct {
 	.ksp = INIT_SP, \
 	.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
 	.fs = KERNEL_DS, \
-	.fpr = {{0}}, \
-	.fpscr = { .val = 0, }, \
 	.fpexc_mode = 0, \
 	.ppr = INIT_PPR, \
 }
diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h
index 3a7a67a0d006..d89beaba26ff 100644
--- a/arch/powerpc/include/asm/sfp-machine.h
+++ b/arch/powerpc/include/asm/sfp-machine.h
@@ -125,7 +125,7 @@
 #define FP_EX_DIVZERO         (1 << (31 - 5))
 #define FP_EX_INEXACT         (1 << (31 - 6))
 
-#define __FPU_FPSCR	(current->thread.fpscr.val)
+#define __FPU_FPSCR	(current->thread.fp_state.fpscr)
 
 /* We only actually write to the destination register
  * if exceptions signalled (if any) will not trap.
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index a27ccd5dc6b9..eaa16bc17e9d 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -660,7 +660,7 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
 	if (reg < 32)
 		ptr = (char *) &current->thread.TS_FPR(reg);
 	else
-		ptr = (char *) &current->thread.vr[reg - 32];
+		ptr = (char *) &current->thread.vr_state.vr[reg - 32];
 
 	lptr = (unsigned long *) ptr;
 
@@ -897,7 +897,7 @@ int fix_alignment(struct pt_regs *regs)
 				return -EFAULT;
 		}
 	} else if (flags & F) {
-		data.dd = current->thread.TS_FPR(reg);
+		data.ll = current->thread.TS_FPR(reg);
 		if (flags & S) {
 			/* Single-precision FP store requires conversion... */
 #ifdef CONFIG_PPC_FPU
@@ -975,7 +975,7 @@ int fix_alignment(struct pt_regs *regs)
 		if (unlikely(ret))
 			return -EFAULT;
 	} else if (flags & F)
-		current->thread.TS_FPR(reg) = data.dd;
+		current->thread.TS_FPR(reg) = data.ll;
 	else
 		regs->gpr[reg] = data.ll;
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 502c7a4e73f7..8d27b61c95b9 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -90,16 +90,15 @@ int main(void)
 	DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
 #endif
 	DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
-	DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
-	DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
+	DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
+	DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
 #ifdef CONFIG_ALTIVEC
-	DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
+	DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
 	DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
-	DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
 	DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
+	DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
-	DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr));
 	DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_PPC64
@@ -143,20 +142,12 @@ int main(void)
 	DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
 	DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
 	DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
-	DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct,
-					 transact_vr[0]));
-	DEFINE(THREAD_TRANSACT_VSCR, offsetof(struct thread_struct,
-					  transact_vscr));
+	DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct,
+						 transact_vr));
 	DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct,
 					    transact_vrsave));
-	DEFINE(THREAD_TRANSACT_FPR0, offsetof(struct thread_struct,
-					  transact_fpr[0]));
-	DEFINE(THREAD_TRANSACT_FPSCR, offsetof(struct thread_struct,
-					   transact_fpscr));
-#ifdef CONFIG_VSX
-	DEFINE(THREAD_TRANSACT_VSR0, offsetof(struct thread_struct,
-					  transact_fpr[0]));
-#endif
+	DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct,
+						 transact_fp));
 	/* Local pt_regs on stack for Transactional Memory funcs. */
 	DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
 	       sizeof(struct pt_regs) + 16);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index caeaabf11a2f..34b96e6d2f0d 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -35,15 +35,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
 2:	REST_32VSRS(n,c,base);						\
 3:
 
-#define __REST_32FPVSRS_TRANSACT(n,c,base)				\
-BEGIN_FTR_SECTION							\
-	b	2f;							\
-END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
-	REST_32FPRS_TRANSACT(n,base);					\
-	b	3f;							\
-2:	REST_32VSRS_TRANSACT(n,c,base);					\
-3:
-
 #define __SAVE_32FPVSRS(n,c,base)					\
 BEGIN_FTR_SECTION							\
 	b	2f;							\
@@ -54,40 +45,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
 3:
 #else
 #define __REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base)
-#define __REST_32FPVSRS_TRANSACT(n,b,base)	REST_32FPRS(n, base)
 #define __SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)
 #endif
 #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
-#define REST_32FPVSRS_TRANSACT(n,c,base) \
-	__REST_32FPVSRS_TRANSACT(n,__REG_##c,__REG_##base)
 #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Wrapper to call load_up_fpu from C.
- * void do_load_up_fpu(struct pt_regs *regs);
- */
-_GLOBAL(do_load_up_fpu)
-	mflr	r0
-	std	r0, 16(r1)
-	stdu	r1, -112(r1)
-
-	subi	r6, r3, STACK_FRAME_OVERHEAD
-	/* load_up_fpu expects r12=MSR, r13=PACA, and returns
-	 * with r12 = new MSR.
-	 */
-	ld	r12,_MSR(r6)
-	GET_PACA(r13)
-
-	bl	load_up_fpu
-	std	r12,_MSR(r6)
-
-	ld	r0, 112+16(r1)
-	addi	r1, r1, 112
-	mtlr	r0
-	blr
-
-
 /* void do_load_up_transact_fpu(struct thread_struct *thread)
  *
  * This is similar to load_up_fpu but for the transactional version of the FP
@@ -105,9 +68,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	SYNC
 	MTMSRD(r5)
 
-	lfd	fr0,THREAD_TRANSACT_FPSCR(r3)
+	addi	r7,r3,THREAD_TRANSACT_FPSTATE
+	lfd	fr0,FPSTATE_FPSCR(r7)
 	MTFSF_L(fr0)
-	REST_32FPVSRS_TRANSACT(0, R4, R3)
+	REST_32FPVSRS(0, R4, R7)
 
 	/* FP/VSX off again */
 	MTMSRD(r6)
@@ -147,9 +111,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	beq	1f
 	toreal(r4)
 	addi	r4,r4,THREAD		/* want last_task_used_math->thread */
-	SAVE_32FPVSRS(0, R5, R4)
+	addi	r8,r4,THREAD_FPSTATE
+	SAVE_32FPVSRS(0, R5, R8)
 	mffs	fr0
-	stfd	fr0,THREAD_FPSCR(r4)
+	stfd	fr0,FPSTATE_FPSCR(r8)
 	PPC_LL	r5,PT_REGS(r4)
 	toreal(r5)
 	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
@@ -160,7 +125,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif /* CONFIG_SMP */
 	/* enable use of FP after return */
 #ifdef CONFIG_PPC32
-	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
+	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
 	lwz	r4,THREAD_FPEXC_MODE(r5)
 	ori	r9,r9,MSR_FP		/* enable FP for current */
 	or	r9,r9,r4
@@ -172,9 +137,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	or	r12,r12,r4
 	std	r12,_MSR(r1)
 #endif
-	lfd	fr0,THREAD_FPSCR(r5)
+	addi	r7,r5,THREAD_FPSTATE
+	lfd	fr0,FPSTATE_FPSCR(r7)
 	MTFSF_L(fr0)
-	REST_32FPVSRS(0, R4, R5)
+	REST_32FPVSRS(0, R4, R7)
 #ifndef CONFIG_SMP
 	subi	r4,r5,THREAD
 	fromreal(r4)
@@ -208,9 +174,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	addi	r3,r3,THREAD	        /* want THREAD of task */
 	PPC_LL	r5,PT_REGS(r3)
 	PPC_LCMPI	0,r5,0
-	SAVE_32FPVSRS(0, R4 ,R3)
+	addi	r6,r3,THREAD_FPSTATE
+	SAVE_32FPVSRS(0, R4, R6)
 	mffs	fr0
-	stfd	fr0,THREAD_FPSCR(r3)
+	stfd	fr0,FPSTATE_FPSCR(r6)
 	beq	1f
 	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 	li	r3,MSR_FP|MSR_FE0|MSR_FE1
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 96d2fdf3aa9e..7a281416affb 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1113,12 +1113,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 #ifdef CONFIG_VSX
 	current->thread.used_vsr = 0;
 #endif
-	memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
-	current->thread.fpscr.val = 0;
+	memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
 #ifdef CONFIG_ALTIVEC
-	memset(current->thread.vr, 0, sizeof(current->thread.vr));
-	memset(&current->thread.vscr, 0, sizeof(current->thread.vscr));
-	current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
+	memset(&current->thread.vr_state, 0, sizeof(current->thread.vr_state));
+	current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */
 	current->thread.vrsave = 0;
 	current->thread.used_vr = 0;
 #endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 9a0d24c390a3..238580043d85 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -362,7 +362,7 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
 		   void *kbuf, void __user *ubuf)
 {
 #ifdef CONFIG_VSX
-	double buf[33];
+	u64 buf[33];
 	int i;
 #endif
 	flush_fp_to_thread(target);
@@ -371,15 +371,15 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
 	/* copy to local buffer then write that out */
 	for (i = 0; i < 32 ; i++)
 		buf[i] = target->thread.TS_FPR(i);
-	memcpy(&buf[32], &target->thread.fpscr, sizeof(double));
+	buf[32] = target->thread.fp_state.fpscr;
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
 
 #else
-	BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) !=
-		     offsetof(struct thread_struct, TS_FPR(32)));
+	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+		     offsetof(struct thread_fp_state, fpr[32][0]));
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &target->thread.fpr, 0, -1);
+				   &target->thread.fp_state, 0, -1);
 #endif
 }
 
@@ -388,7 +388,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 		   const void *kbuf, const void __user *ubuf)
 {
 #ifdef CONFIG_VSX
-	double buf[33];
+	u64 buf[33];
 	int i;
 #endif
 	flush_fp_to_thread(target);
@@ -400,14 +400,14 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 		return i;
 	for (i = 0; i < 32 ; i++)
 		target->thread.TS_FPR(i) = buf[i];
-	memcpy(&target->thread.fpscr, &buf[32], sizeof(double));
+	target->thread.fp_state.fpscr = buf[32];
 	return 0;
 #else
-	BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) !=
-		     offsetof(struct thread_struct, TS_FPR(32)));
+	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+		     offsetof(struct thread_fp_state, fpr[32][0]));
 
 	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				  &target->thread.fpr, 0, -1);
+				  &target->thread.fp_state, 0, -1);
 #endif
 }
 
@@ -440,11 +440,11 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
 
 	flush_altivec_to_thread(target);
 
-	BUILD_BUG_ON(offsetof(struct thread_struct, vscr) !=
-		     offsetof(struct thread_struct, vr[32]));
+	BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+		     offsetof(struct thread_vr_state, vr[32]));
 
 	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				  &target->thread.vr, 0,
+				  &target->thread.vr_state, 0,
 				  33 * sizeof(vector128));
 	if (!ret) {
 		/*
@@ -471,11 +471,12 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
 
 	flush_altivec_to_thread(target);
 
-	BUILD_BUG_ON(offsetof(struct thread_struct, vscr) !=
-		     offsetof(struct thread_struct, vr[32]));
+	BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+		     offsetof(struct thread_vr_state, vr[32]));
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &target->thread.vr, 0, 33 * sizeof(vector128));
+				 &target->thread.vr_state, 0,
+				 33 * sizeof(vector128));
 	if (!ret && count > 0) {
 		/*
 		 * We use only the first word of vrsave.
@@ -514,13 +515,13 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   void *kbuf, void __user *ubuf)
 {
-	double buf[32];
+	u64 buf[32];
 	int ret, i;
 
 	flush_vsx_to_thread(target);
 
 	for (i = 0; i < 32 ; i++)
-		buf[i] = target->thread.fpr[i][TS_VSRLOWOFFSET];
+		buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
 	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 				  buf, 0, 32 * sizeof(double));
 
@@ -531,7 +532,7 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   const void *kbuf, const void __user *ubuf)
 {
-	double buf[32];
+	u64 buf[32];
 	int ret,i;
 
 	flush_vsx_to_thread(target);
@@ -539,7 +540,7 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset,
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				 buf, 0, 32 * sizeof(double));
 	for (i = 0; i < 32 ; i++)
-		target->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+		target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
 
 
 	return ret;
@@ -1554,10 +1555,10 @@ long arch_ptrace(struct task_struct *child, long request,
 
 			flush_fp_to_thread(child);
 			if (fpidx < (PT_FPSCR - PT_FPR0))
-				tmp = ((unsigned long *)child->thread.fpr)
+				tmp = ((unsigned long *)child->thread.fp_state.fpr)
 					[fpidx * TS_FPRWIDTH];
 			else
-				tmp = child->thread.fpscr.val;
+				tmp = child->thread.fp_state.fpscr;
 		}
 		ret = put_user(tmp, datalp);
 		break;
@@ -1587,10 +1588,10 @@ long arch_ptrace(struct task_struct *child, long request,
 
 			flush_fp_to_thread(child);
 			if (fpidx < (PT_FPSCR - PT_FPR0))
-				((unsigned long *)child->thread.fpr)
+				((unsigned long *)child->thread.fp_state.fpr)
 					[fpidx * TS_FPRWIDTH] = data;
 			else
-				child->thread.fpscr.val = data;
+				child->thread.fp_state.fpscr = data;
 			ret = 0;
 		}
 		break;
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index f51599e941c7..097f8dc426a0 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -43,7 +43,6 @@
 #define FPRNUMBER(i) (((i) - PT_FPR0) >> 1)
 #define FPRHALF(i) (((i) - PT_FPR0) & 1)
 #define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i)
-#define FPRINDEX_3264(i) (TS_FPRWIDTH * ((i) - PT_FPR0))
 
 long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			compat_ulong_t caddr, compat_ulong_t cdata)
@@ -105,7 +104,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			 * to be an array of unsigned int (32 bits) - the
 			 * index passed in is based on this assumption.
 			 */
-			tmp = ((unsigned int *)child->thread.fpr)
+			tmp = ((unsigned int *)child->thread.fp_state.fpr)
 				[FPRINDEX(index)];
 		}
 		ret = put_user((unsigned int)tmp, (u32 __user *)data);
@@ -147,8 +146,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if (numReg >= PT_FPR0) {
 			flush_fp_to_thread(child);
 			/* get 64 bit FPR */
-			tmp = ((u64 *)child->thread.fpr)
-				[FPRINDEX_3264(numReg)];
+			tmp = child->thread.fp_state.fpr[numReg - PT_FPR0][0];
 		} else { /* register within PT_REGS struct */
 			unsigned long tmp2;
 			ret = ptrace_get_reg(child, numReg, &tmp2);
@@ -207,7 +205,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			 * to be an array of unsigned int (32 bits) - the
 			 * index passed in is based on this assumption.
 			 */
-			((unsigned int *)child->thread.fpr)
+			((unsigned int *)child->thread.fp_state.fpr)
 				[FPRINDEX(index)] = data;
 			ret = 0;
 		}
@@ -251,8 +249,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			u64 *tmp;
 			flush_fp_to_thread(child);
 			/* get 64 bit FPR ... */
-			tmp = &(((u64 *)child->thread.fpr)
-				[FPRINDEX_3264(numReg)]);
+			tmp = &child->thread.fp_state.fpr[numReg - PT_FPR0][0];
 			/* ... write the 32 bit part we want */
 			((u32 *)tmp)[index % 2] = data;
 			ret = 0;
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index bebdf1a1a540..ea25e45ea959 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -265,27 +265,27 @@ struct rt_sigframe {
 unsigned long copy_fpr_to_user(void __user *to,
 			       struct task_struct *task)
 {
-	double buf[ELF_NFPREG];
+	u64 buf[ELF_NFPREG];
 	int i;
 
 	/* save FPR copy to local buffer then write to the thread_struct */
 	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
 		buf[i] = task->thread.TS_FPR(i);
-	memcpy(&buf[i], &task->thread.fpscr, sizeof(double));
+	buf[i] = task->thread.fp_state.fpscr;
 	return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
 }
 
 unsigned long copy_fpr_from_user(struct task_struct *task,
 				 void __user *from)
 {
-	double buf[ELF_NFPREG];
+	u64 buf[ELF_NFPREG];
 	int i;
 
 	if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
 		return 1;
 	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
 		task->thread.TS_FPR(i) = buf[i];
-	memcpy(&task->thread.fpscr, &buf[i], sizeof(double));
+	task->thread.fp_state.fpscr = buf[i];
 
 	return 0;
 }
@@ -293,25 +293,25 @@ unsigned long copy_fpr_from_user(struct task_struct *task,
 unsigned long copy_vsx_to_user(void __user *to,
 			       struct task_struct *task)
 {
-	double buf[ELF_NVSRHALFREG];
+	u64 buf[ELF_NVSRHALFREG];
 	int i;
 
 	/* save FPR copy to local buffer then write to the thread_struct */
 	for (i = 0; i < ELF_NVSRHALFREG; i++)
-		buf[i] = task->thread.fpr[i][TS_VSRLOWOFFSET];
+		buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
 	return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
 }
 
 unsigned long copy_vsx_from_user(struct task_struct *task,
 				 void __user *from)
 {
-	double buf[ELF_NVSRHALFREG];
+	u64 buf[ELF_NVSRHALFREG];
 	int i;
 
 	if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
 		return 1;
 	for (i = 0; i < ELF_NVSRHALFREG ; i++)
-		task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+		task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
 	return 0;
 }
 
@@ -319,27 +319,27 @@ unsigned long copy_vsx_from_user(struct task_struct *task,
 unsigned long copy_transact_fpr_to_user(void __user *to,
 				  struct task_struct *task)
 {
-	double buf[ELF_NFPREG];
+	u64 buf[ELF_NFPREG];
 	int i;
 
 	/* save FPR copy to local buffer then write to the thread_struct */
 	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
 		buf[i] = task->thread.TS_TRANS_FPR(i);
-	memcpy(&buf[i], &task->thread.transact_fpscr, sizeof(double));
+	buf[i] = task->thread.transact_fp.fpscr;
 	return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
 }
 
 unsigned long copy_transact_fpr_from_user(struct task_struct *task,
 					  void __user *from)
 {
-	double buf[ELF_NFPREG];
+	u64 buf[ELF_NFPREG];
 	int i;
 
 	if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
 		return 1;
 	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
 		task->thread.TS_TRANS_FPR(i) = buf[i];
-	memcpy(&task->thread.transact_fpscr, &buf[i], sizeof(double));
+	task->thread.transact_fp.fpscr = buf[i];
 
 	return 0;
 }
@@ -347,25 +347,25 @@ unsigned long copy_transact_fpr_from_user(struct task_struct *task,
 unsigned long copy_transact_vsx_to_user(void __user *to,
 				  struct task_struct *task)
 {
-	double buf[ELF_NVSRHALFREG];
+	u64 buf[ELF_NVSRHALFREG];
 	int i;
 
 	/* save FPR copy to local buffer then write to the thread_struct */
 	for (i = 0; i < ELF_NVSRHALFREG; i++)
-		buf[i] = task->thread.transact_fpr[i][TS_VSRLOWOFFSET];
+		buf[i] = task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET];
 	return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
 }
 
 unsigned long copy_transact_vsx_from_user(struct task_struct *task,
 					  void __user *from)
 {
-	double buf[ELF_NVSRHALFREG];
+	u64 buf[ELF_NVSRHALFREG];
 	int i;
 
 	if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
 		return 1;
 	for (i = 0; i < ELF_NVSRHALFREG ; i++)
-		task->thread.transact_fpr[i][TS_VSRLOWOFFSET] = buf[i];
+		task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = buf[i];
 	return 0;
 }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -373,14 +373,14 @@ unsigned long copy_transact_vsx_from_user(struct task_struct *task,
 inline unsigned long copy_fpr_to_user(void __user *to,
 				      struct task_struct *task)
 {
-	return __copy_to_user(to, task->thread.fpr,
+	return __copy_to_user(to, task->thread.fp_state.fpr,
 			      ELF_NFPREG * sizeof(double));
 }
 
 inline unsigned long copy_fpr_from_user(struct task_struct *task,
 					void __user *from)
 {
-	return __copy_from_user(task->thread.fpr, from,
+	return __copy_from_user(task->thread.fp_state.fpr, from,
 			      ELF_NFPREG * sizeof(double));
 }
 
@@ -388,14 +388,14 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task,
 inline unsigned long copy_transact_fpr_to_user(void __user *to,
 					 struct task_struct *task)
 {
-	return __copy_to_user(to, task->thread.transact_fpr,
+	return __copy_to_user(to, task->thread.transact_fp.fpr,
 			      ELF_NFPREG * sizeof(double));
 }
 
 inline unsigned long copy_transact_fpr_from_user(struct task_struct *task,
 						 void __user *from)
 {
-	return __copy_from_user(task->thread.transact_fpr, from,
+	return __copy_from_user(task->thread.transact_fp.fpr, from,
 				ELF_NFPREG * sizeof(double));
 }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -423,7 +423,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
 	/* save altivec registers */
 	if (current->thread.used_vr) {
 		flush_altivec_to_thread(current);
-		if (__copy_to_user(&frame->mc_vregs, current->thread.vr,
+		if (__copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
 				   ELF_NVRREG * sizeof(vector128)))
 			return 1;
 		/* set MSR_VEC in the saved MSR value to indicate that
@@ -534,17 +534,17 @@ static int save_tm_user_regs(struct pt_regs *regs,
 	/* save altivec registers */
 	if (current->thread.used_vr) {
 		flush_altivec_to_thread(current);
-		if (__copy_to_user(&frame->mc_vregs, current->thread.vr,
+		if (__copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
 				   ELF_NVRREG * sizeof(vector128)))
 			return 1;
 		if (msr & MSR_VEC) {
 			if (__copy_to_user(&tm_frame->mc_vregs,
-					   current->thread.transact_vr,
+					   &current->thread.transact_vr,
 					   ELF_NVRREG * sizeof(vector128)))
 				return 1;
 		} else {
 			if (__copy_to_user(&tm_frame->mc_vregs,
-					   current->thread.vr,
+					   &current->thread.vr_state,
 					   ELF_NVRREG * sizeof(vector128)))
 				return 1;
 		}
@@ -692,11 +692,12 @@ static long restore_user_regs(struct pt_regs *regs,
 	regs->msr &= ~MSR_VEC;
 	if (msr & MSR_VEC) {
 		/* restore altivec registers from the stack */
-		if (__copy_from_user(current->thread.vr, &sr->mc_vregs,
+		if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
 				     sizeof(sr->mc_vregs)))
 			return 1;
 	} else if (current->thread.used_vr)
-		memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128));
+		memset(&current->thread.vr_state, 0,
+		       ELF_NVRREG * sizeof(vector128));
 
 	/* Always get VRSAVE back */
 	if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
@@ -722,7 +723,7 @@ static long restore_user_regs(struct pt_regs *regs,
 			return 1;
 	} else if (current->thread.used_vsr)
 		for (i = 0; i < 32 ; i++)
-			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
 #endif /* CONFIG_VSX */
 	/*
 	 * force the process to reload the FP registers from
@@ -798,15 +799,16 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	regs->msr &= ~MSR_VEC;
 	if (msr & MSR_VEC) {
 		/* restore altivec registers from the stack */
-		if (__copy_from_user(current->thread.vr, &sr->mc_vregs,
+		if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
 				     sizeof(sr->mc_vregs)) ||
-		    __copy_from_user(current->thread.transact_vr,
+		    __copy_from_user(&current->thread.transact_vr,
 				     &tm_sr->mc_vregs,
 				     sizeof(sr->mc_vregs)))
 			return 1;
 	} else if (current->thread.used_vr) {
-		memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128));
-		memset(current->thread.transact_vr, 0,
+		memset(&current->thread.vr_state, 0,
+		       ELF_NVRREG * sizeof(vector128));
+		memset(&current->thread.transact_vr, 0,
 		       ELF_NVRREG * sizeof(vector128));
 	}
 
@@ -838,8 +840,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 			return 1;
 	} else if (current->thread.used_vsr)
 		for (i = 0; i < 32 ; i++) {
-			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
-			current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;
 		}
 #endif /* CONFIG_VSX */
 
@@ -1030,7 +1032,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
 		if (__put_user(0, &rt_sf->uc.uc_link))
 			goto badframe;
 
-	current->thread.fpscr.val = 0;	/* turn off all fp exceptions */
+	current->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
 
 	/* create a stack frame for the caller of the handler */
 	newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
@@ -1462,7 +1464,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
 
 	regs->link = tramp;
 
-	current->thread.fpscr.val = 0;	/* turn off all fp exceptions */
+	current->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
 
 	/* create a stack frame for the caller of the handler */
 	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index f93ec2835a13..a3c1ed4b979c 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -103,7 +103,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 	if (current->thread.used_vr) {
 		flush_altivec_to_thread(current);
 		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
-		err |= __copy_to_user(v_regs, current->thread.vr, 33 * sizeof(vector128));
+		err |= __copy_to_user(v_regs, &current->thread.vr_state,
+				      33 * sizeof(vector128));
 		/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
 		 * contains valid data.
 		 */
@@ -195,18 +196,18 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 	if (current->thread.used_vr) {
 		flush_altivec_to_thread(current);
 		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
-		err |= __copy_to_user(v_regs, current->thread.vr,
+		err |= __copy_to_user(v_regs, &current->thread.vr_state,
 				      33 * sizeof(vector128));
 		/* If VEC was enabled there are transactional VRs valid too,
 		 * else they're a copy of the checkpointed VRs.
 		 */
 		if (msr & MSR_VEC)
 			err |= __copy_to_user(tm_v_regs,
-					      current->thread.transact_vr,
+					      &current->thread.transact_vr,
 					      33 * sizeof(vector128));
 		else
 			err |= __copy_to_user(tm_v_regs,
-					      current->thread.vr,
+					      &current->thread.vr_state,
 					      33 * sizeof(vector128));
 
 		/* set MSR_VEC in the MSR value in the frame to indicate
@@ -349,10 +350,10 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
 		return -EFAULT;
 	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
 	if (v_regs != NULL && (msr & MSR_VEC) != 0)
-		err |= __copy_from_user(current->thread.vr, v_regs,
+		err |= __copy_from_user(&current->thread.vr_state, v_regs,
 					33 * sizeof(vector128));
 	else if (current->thread.used_vr)
-		memset(current->thread.vr, 0, 33 * sizeof(vector128));
+		memset(&current->thread.vr_state, 0, 33 * sizeof(vector128));
 	/* Always get VRSAVE back */
 	if (v_regs != NULL)
 		err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
@@ -374,7 +375,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
 		err |= copy_vsx_from_user(current, v_regs);
 	else
 		for (i = 0; i < 32 ; i++)
-			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
 #endif
 	return err;
 }
@@ -468,14 +469,14 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
 		return -EFAULT;
 	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
 	if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) {
-		err |= __copy_from_user(current->thread.vr, v_regs,
+		err |= __copy_from_user(&current->thread.vr_state, v_regs,
 					33 * sizeof(vector128));
-		err |= __copy_from_user(current->thread.transact_vr, tm_v_regs,
+		err |= __copy_from_user(&current->thread.transact_vr, tm_v_regs,
 					33 * sizeof(vector128));
 	}
 	else if (current->thread.used_vr) {
-		memset(current->thread.vr, 0, 33 * sizeof(vector128));
-		memset(current->thread.transact_vr, 0, 33 * sizeof(vector128));
+		memset(&current->thread.vr_state, 0, 33 * sizeof(vector128));
+		memset(&current->thread.transact_vr, 0, 33 * sizeof(vector128));
 	}
 	/* Always get VRSAVE back */
 	if (v_regs != NULL && tm_v_regs != NULL) {
@@ -507,8 +508,8 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
 		err |= copy_transact_vsx_from_user(current, tm_v_regs);
 	} else {
 		for (i = 0; i < 32 ; i++) {
-			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
-			current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;
 		}
 	}
 #endif
@@ -747,7 +748,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
 		goto badframe;
 
 	/* Make sure signal handler doesn't get spurious FP exceptions */
-	current->thread.fpscr.val = 0;
+	current->thread.fp_state.fpscr = 0;
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext
 	 * just indicates to userland that we were doing a transaction, but we
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index cd809eaa8b5c..761af4f0a632 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -12,16 +12,15 @@
 #include <asm/reg.h>
 
 #ifdef CONFIG_VSX
-/* See fpu.S, this is very similar but to save/restore checkpointed FPRs/VSRs */
-#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base)	\
+/* See fpu.S, this is borrowed from there */
+#define __SAVE_32FPRS_VSRS(n,c,base)		\
 BEGIN_FTR_SECTION				\
 	b	2f;				\
 END_FTR_SECTION_IFSET(CPU_FTR_VSX);		\
-	SAVE_32FPRS_TRANSACT(n,base);		\
+	SAVE_32FPRS(n,base);			\
 	b	3f;				\
-2:	SAVE_32VSRS_TRANSACT(n,c,base);		\
+2:	SAVE_32VSRS(n,c,base);			\
 3:
-/* ...and this is just plain borrowed from there. */
 #define __REST_32FPRS_VSRS(n,c,base)		\
 BEGIN_FTR_SECTION				\
 	b	2f;				\
@@ -31,11 +30,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);		\
 2:	REST_32VSRS(n,c,base);			\
 3:
 #else
-#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base) SAVE_32FPRS_TRANSACT(n, base)
-#define __REST_32FPRS_VSRS(n,c,base)	      REST_32FPRS(n, base)
+#define __SAVE_32FPRS_VSRS(n,c,base)	SAVE_32FPRS(n, base)
+#define __REST_32FPRS_VSRS(n,c,base)	REST_32FPRS(n, base)
 #endif
-#define SAVE_32FPRS_VSRS_TRANSACT(n,c,base) \
-	__SAVE_32FPRS_VSRS_TRANSACT(n,__REG_##c,__REG_##base)
+#define SAVE_32FPRS_VSRS(n,c,base) \
+	__SAVE_32FPRS_VSRS(n,__REG_##c,__REG_##base)
 #define REST_32FPRS_VSRS(n,c,base) \
 	__REST_32FPRS_VSRS(n,__REG_##c,__REG_##base)
 
@@ -157,10 +156,11 @@ _GLOBAL(tm_reclaim)
 	andis.		r0, r4, MSR_VEC@h
 	beq	dont_backup_vec
 
-	SAVE_32VRS_TRANSACT(0, r6, r3)	/* r6 scratch, r3 thread */
+	addi	r7, r3, THREAD_TRANSACT_VRSTATE
+	SAVE_32VRS(0, r6, r7)	/* r6 scratch, r7 transact vr state */
 	mfvscr	vr0
-	li	r6, THREAD_TRANSACT_VSCR
-	stvx	vr0, r3, r6
+	li	r6, VRSTATE_VSCR
+	stvx	vr0, r7, r6
 dont_backup_vec:
 	mfspr	r0, SPRN_VRSAVE
 	std	r0, THREAD_TRANSACT_VRSAVE(r3)
@@ -168,10 +168,11 @@ dont_backup_vec:
 	andi.	r0, r4, MSR_FP
 	beq	dont_backup_fp
 
-	SAVE_32FPRS_VSRS_TRANSACT(0, R6, R3)	/* r6 scratch, r3 thread */
+	addi	r7, r3, THREAD_TRANSACT_FPSTATE
+	SAVE_32FPRS_VSRS(0, R6, R7)	/* r6 scratch, r7 transact fp state */
 
 	mffs    fr0
-	stfd    fr0,THREAD_TRANSACT_FPSCR(r3)
+	stfd    fr0,FPSTATE_FPSCR(r7)
 
 dont_backup_fp:
 	/* The moment we treclaim, ALL of our GPRs will switch
@@ -358,10 +359,11 @@ _GLOBAL(tm_recheckpoint)
 	andis.	r0, r4, MSR_VEC@h
 	beq	dont_restore_vec
 
-	li	r5, THREAD_VSCR
-	lvx	vr0, r3, r5
+	addi	r8, r3, THREAD_VRSTATE
+	li	r5, VRSTATE_VSCR
+	lvx	vr0, r8, r5
 	mtvscr	vr0
-	REST_32VRS(0, r5, r3)			/* r5 scratch, r3 THREAD ptr */
+	REST_32VRS(0, r5, r8)			/* r5 scratch, r8 ptr */
 dont_restore_vec:
 	ld	r5, THREAD_VRSAVE(r3)
 	mtspr	SPRN_VRSAVE, r5
@@ -370,9 +372,10 @@ dont_restore_vec:
 	andi.	r0, r4, MSR_FP
 	beq	dont_restore_fp
 
-	lfd	fr0, THREAD_FPSCR(r3)
+	addi	r8, r3, THREAD_FPSTATE
+	lfd	fr0, FPSTATE_FPSCR(r8)
 	MTFSF_L(fr0)
-	REST_32FPRS_VSRS(0, R4, R3)
+	REST_32FPRS_VSRS(0, R4, R8)
 
 dont_restore_fp:
 	mtmsr	r6				/* FP/Vec off again! */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f783c932faeb..f0a6814007a5 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -816,7 +816,7 @@ static void parse_fpe(struct pt_regs *regs)
 
 	flush_fp_to_thread(current);
 
-	code = __parse_fpscr(current->thread.fpscr.val);
+	code = __parse_fpscr(current->thread.fp_state.fpscr);
 
 	_exception(SIGFPE, regs, code, regs->nip);
 }
@@ -1069,7 +1069,7 @@ static int emulate_math(struct pt_regs *regs)
 		return 0;
 	case 1: {
 			int code = 0;
-			code = __parse_fpscr(current->thread.fpscr.val);
+			code = __parse_fpscr(current->thread.fp_state.fpscr);
 			_exception(SIGFPE, regs, code, regs->nip);
 			return 0;
 		}
@@ -1371,8 +1371,6 @@ void facility_unavailable_exception(struct pt_regs *regs)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 
-extern void do_load_up_fpu(struct pt_regs *regs);
-
 void fp_unavailable_tm(struct pt_regs *regs)
 {
 	/* Note:  This does not handle any kind of FP laziness. */
@@ -1403,8 +1401,6 @@ void fp_unavailable_tm(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_ALTIVEC
-extern void do_load_up_altivec(struct pt_regs *regs);
-
 void altivec_unavailable_tm(struct pt_regs *regs)
 {
 	/* See the comments in fp_unavailable_tm().  This function operates
@@ -1634,7 +1630,7 @@ void altivec_assist_exception(struct pt_regs *regs)
 		/* XXX quick hack for now: set the non-Java bit in the VSCR */
 		printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "
 				   "in %s at %lx\n", current->comm, regs->nip);
-		current->thread.vscr.u[3] |= 0x10000;
+		current->thread.vr_state.vscr.u[3] |= 0x10000;
 	}
 }
 #endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index 604d0947cb20..c4bfadb2606b 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -271,7 +271,7 @@ int emulate_altivec(struct pt_regs *regs)
 	vb = (instr >> 11) & 0x1f;
 	vc = (instr >> 6) & 0x1f;
 
-	vrs = current->thread.vr;
+	vrs = current->thread.vr_state.vr;
 	switch (instr & 0x3f) {
 	case 10:
 		switch (vc) {
@@ -320,12 +320,12 @@ int emulate_altivec(struct pt_regs *regs)
 		case 14:	/* vctuxs */
 			for (i = 0; i < 4; ++i)
 				vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
-						&current->thread.vscr.u[3]);
+					&current->thread.vr_state.vscr.u[3]);
 			break;
 		case 15:	/* vctsxs */
 			for (i = 0; i < 4; ++i)
 				vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
-						&current->thread.vscr.u[3]);
+					&current->thread.vr_state.vscr.u[3]);
 			break;
 		default:
 			return -EINVAL;
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 9e20999aaef2..a48df870b696 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -8,29 +8,6 @@
 #include <asm/ptrace.h>
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Wrapper to call load_up_altivec from C.
- * void do_load_up_altivec(struct pt_regs *regs);
- */
-_GLOBAL(do_load_up_altivec)
-	mflr	r0
-	std	r0, 16(r1)
-	stdu	r1, -112(r1)
-
-	subi	r6, r3, STACK_FRAME_OVERHEAD
-	/* load_up_altivec expects r12=MSR, r13=PACA, and returns
-	 * with r12 = new MSR.
-	 */
-	ld	r12,_MSR(r6)
-	GET_PACA(r13)
-	bl	load_up_altivec
-	std	r12,_MSR(r6)
-
-	ld	r0, 112+16(r1)
-	addi	r1, r1, 112
-	mtlr	r0
-	blr
-
 /* void do_load_up_transact_altivec(struct thread_struct *thread)
  *
  * This is similar to load_up_altivec but for the transactional version of the
@@ -46,10 +23,11 @@ _GLOBAL(do_load_up_transact_altivec)
 	li	r4,1
 	stw	r4,THREAD_USED_VR(r3)
 
-	li	r10,THREAD_TRANSACT_VSCR
+	li	r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
 	lvx	vr0,r10,r3
 	mtvscr	vr0
-	REST_32VRS_TRANSACT(0,r4,r3)
+	addi	r10,r3,THREAD_TRANSACT_VRSTATE
+	REST_32VRS(0,r4,r10)
 
 	/* Disable VEC again. */
 	MTMSRD(r6)
@@ -59,7 +37,6 @@ _GLOBAL(do_load_up_transact_altivec)
 #endif
 
 /*
- * load_up_altivec(unused, unused, tsk)
  * Disable VMX for the task which had it previously,
  * and save its vector registers in its thread_struct.
  * Enables the VMX for use in the kernel on return.
@@ -90,10 +67,11 @@ _GLOBAL(load_up_altivec)
 	/* Save VMX state to last_task_used_altivec's THREAD struct */
 	toreal(r4)
 	addi	r4,r4,THREAD
-	SAVE_32VRS(0,r5,r4)
+	addi	r7,r4,THREAD_VRSTATE
+	SAVE_32VRS(0,r5,r7)
 	mfvscr	vr0
-	li	r10,THREAD_VSCR
-	stvx	vr0,r10,r4
+	li	r10,VRSTATE_VSCR
+	stvx	vr0,r10,r7
 	/* Disable VMX for last_task_used_altivec */
 	PPC_LL	r5,PT_REGS(r4)
 	toreal(r5)
@@ -125,12 +103,13 @@ _GLOBAL(load_up_altivec)
 	oris	r12,r12,MSR_VEC@h
 	std	r12,_MSR(r1)
 #endif
+	addi	r7,r5,THREAD_VRSTATE
 	li	r4,1
-	li	r10,THREAD_VSCR
+	li	r10,VRSTATE_VSCR
 	stw	r4,THREAD_USED_VR(r5)
-	lvx	vr0,r10,r5
+	lvx	vr0,r10,r7
 	mtvscr	vr0
-	REST_32VRS(0,r4,r5)
+	REST_32VRS(0,r4,r7)
 #ifndef CONFIG_SMP
 	/* Update last_task_used_altivec to 'current' */
 	subi	r4,r5,THREAD		/* Back to 'current' */
@@ -165,12 +144,13 @@ _GLOBAL(giveup_altivec)
 	PPC_LCMPI	0,r3,0
 	beqlr				/* if no previous owner, done */
 	addi	r3,r3,THREAD		/* want THREAD of task */
+	addi	r7,r3,THREAD_VRSTATE
 	PPC_LL	r5,PT_REGS(r3)
 	PPC_LCMPI	0,r5,0
-	SAVE_32VRS(0,r4,r3)
+	SAVE_32VRS(0,r4,r7)
 	mfvscr	vr0
-	li	r4,THREAD_VSCR
-	stvx	vr0,r4,r3
+	li	r4,VRSTATE_VSCR
+	stvx	vr0,r4,r7
 	beq	1f
 	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 #ifdef CONFIG_VSX
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 27db1e665959..c0b48f96a91c 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -444,7 +444,7 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 #ifdef CONFIG_VSX
 	u64 *vcpu_vsx = vcpu->arch.vsr;
 #endif
-	u64 *thread_fpr = (u64*)t->fpr;
+	u64 *thread_fpr = &t->fp_state.fpr[0][0];
 	int i;
 
 	/*
@@ -466,14 +466,14 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 		/*
 		 * Note that on CPUs with VSX, giveup_fpu stores
 		 * both the traditional FP registers and the added VSX
-		 * registers into thread.fpr[].
+		 * registers into thread.fp_state.fpr[].
 		 */
 		if (current->thread.regs->msr & MSR_FP)
 			giveup_fpu(current);
 		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
 			vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
 
-		vcpu->arch.fpscr = t->fpscr.val;
+		vcpu->arch.fpscr = t->fp_state.fpscr;
 
 #ifdef CONFIG_VSX
 		if (cpu_has_feature(CPU_FTR_VSX))
@@ -486,8 +486,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 	if (msr & MSR_VEC) {
 		if (current->thread.regs->msr & MSR_VEC)
 			giveup_altivec(current);
-		memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
-		vcpu->arch.vscr = t->vscr;
+		memcpy(vcpu->arch.vr, t->vr_state.vr, sizeof(vcpu->arch.vr));
+		vcpu->arch.vscr = t->vr_state.vscr;
 	}
 #endif
 
@@ -539,7 +539,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 #ifdef CONFIG_VSX
 	u64 *vcpu_vsx = vcpu->arch.vsr;
 #endif
-	u64 *thread_fpr = (u64*)t->fpr;
+	u64 *thread_fpr = &t->fp_state.fpr[0][0];
 	int i;
 
 	/* When we have paired singles, we emulate in software */
@@ -584,15 +584,15 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 		for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
 			thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
 #endif
-		t->fpscr.val = vcpu->arch.fpscr;
+		t->fp_state.fpscr = vcpu->arch.fpscr;
 		t->fpexc_mode = 0;
 		kvmppc_load_up_fpu();
 	}
 
 	if (msr & MSR_VEC) {
 #ifdef CONFIG_ALTIVEC
-		memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
-		t->vscr = vcpu->arch.vscr;
+		memcpy(t->vr_state.vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
+		t->vr_state.vscr = vcpu->arch.vscr;
 		t->vrsave = -1;
 		kvmppc_load_up_altivec();
 #endif
@@ -1116,12 +1116,10 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret;
-	double fpr[32][TS_FPRWIDTH];
-	unsigned int fpscr;
+	struct thread_fp_state fp;
 	int fpexc_mode;
 #ifdef CONFIG_ALTIVEC
-	vector128 vr[32];
-	vector128 vscr;
+	struct thread_vr_state vr;
 	unsigned long uninitialized_var(vrsave);
 	int used_vr;
 #endif
@@ -1153,8 +1151,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	/* Save FPU state in stack */
 	if (current->thread.regs->msr & MSR_FP)
 		giveup_fpu(current);
-	memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
-	fpscr = current->thread.fpscr.val;
+	fp = current->thread.fp_state;
 	fpexc_mode = current->thread.fpexc_mode;
 
 #ifdef CONFIG_ALTIVEC
@@ -1163,8 +1160,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	if (used_vr) {
 		if (current->thread.regs->msr & MSR_VEC)
 			giveup_altivec(current);
-		memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
-		vscr = current->thread.vscr;
+		vr = current->thread.vr_state;
 		vrsave = current->thread.vrsave;
 	}
 #endif
@@ -1196,15 +1192,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	current->thread.regs->msr = ext_msr;
 
 	/* Restore FPU/VSX state from stack */
-	memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
-	current->thread.fpscr.val = fpscr;
+	current->thread.fp_state = fp;
 	current->thread.fpexc_mode = fpexc_mode;
 
 #ifdef CONFIG_ALTIVEC
 	/* Restore Altivec state from stack */
 	if (used_vr && current->thread.used_vr) {
-		memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
-		current->thread.vscr = vscr;
+		current->thread.vr_state = vr;
 		current->thread.vrsave = vrsave;
 	}
 	current->thread.used_vr = used_vr;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 17722d82f1d1..5133199f6cb7 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -656,9 +656,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret, s;
 #ifdef CONFIG_PPC_FPU
-	unsigned int fpscr;
+	struct thread_fp_state fp;
 	int fpexc_mode;
-	u64 fpr[32];
 #endif
 
 	if (!vcpu->arch.sane) {
@@ -677,13 +676,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 #ifdef CONFIG_PPC_FPU
 	/* Save userspace FPU state in stack */
 	enable_kernel_fp();
-	memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
-	fpscr = current->thread.fpscr.val;
+	fp = current->thread.fp_state;
 	fpexc_mode = current->thread.fpexc_mode;
 
 	/* Restore guest FPU state to thread */
-	memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr));
-	current->thread.fpscr.val = vcpu->arch.fpscr;
+	memcpy(current->thread.fp_state.fpr, vcpu->arch.fpr,
+	       sizeof(vcpu->arch.fpr));
+	current->thread.fp_state.fpscr = vcpu->arch.fpscr;
 
 	/*
 	 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
@@ -709,12 +708,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	vcpu->fpu_active = 0;
 
 	/* Save guest FPU state from thread */
-	memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr));
-	vcpu->arch.fpscr = current->thread.fpscr.val;
+	memcpy(vcpu->arch.fpr, current->thread.fp_state.fpr,
+	       sizeof(vcpu->arch.fpr));
+	vcpu->arch.fpscr = current->thread.fp_state.fpscr;
 
 	/* Restore userspace FPU state from stack */
-	memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
-	current->thread.fpscr.val = fpscr;
+	current->thread.fp_state = fp;
 	current->thread.fpexc_mode = fpexc_mode;
 #endif
 
-- 
cgit v1.2.3


From 18461960cbf50bf345ef0667d45d5f64de8fb893 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 10 Sep 2013 20:21:10 +1000
Subject: powerpc: Provide for giveup_fpu/altivec to save state in alternate
 location

This provides a facility which is intended for use by KVM, where the
contents of the FP/VSX and VMX (Altivec) registers can be saved away
to somewhere other than the thread_struct when kernel code wants to
use floating point or VMX instructions.  This is done by providing a
pointer in the thread_struct to indicate where the state should be
saved to.  The giveup_fpu() and giveup_altivec() functions test these
pointers and save state to the indicated location if they are non-NULL.
Note that the MSR_FP/VEC bits in task->thread.regs->msr are still used
to indicate whether the CPU register state is live, even when an
alternate save location is being used.

This also provides load_fp_state() and load_vr_state() functions, which
load up FP/VSX and VMX state from memory into the CPU registers, and
corresponding store_fp_state() and store_vr_state() functions, which
store FP/VSX and VMX state into memory from the CPU registers.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/processor.h |  7 +++++++
 arch/powerpc/kernel/asm-offsets.c    |  2 ++
 arch/powerpc/kernel/fpu.S            | 25 ++++++++++++++++++++++++-
 arch/powerpc/kernel/ppc_ksyms.c      |  4 ++++
 arch/powerpc/kernel/process.c        |  7 +++++++
 arch/powerpc/kernel/vector.S         | 29 +++++++++++++++++++++++++++--
 6 files changed, 71 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index afe695e9feb8..ea88e7bd4a34 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -211,6 +211,7 @@ struct thread_struct {
 #endif
 #endif
 	struct thread_fp_state	fp_state;
+	struct thread_fp_state	*fp_save_area;
 	int		fpexc_mode;	/* floating-point exception mode */
 	unsigned int	align_ctl;	/* alignment handling control */
 #ifdef CONFIG_PPC64
@@ -229,6 +230,7 @@ struct thread_struct {
 	unsigned long	trap_nr;	/* last trap # on this thread */
 #ifdef CONFIG_ALTIVEC
 	struct thread_vr_state vr_state;
+	struct thread_vr_state *vr_save_area;
 	unsigned long	vrsave;
 	int		used_vr;	/* set if process has used altivec */
 #endif /* CONFIG_ALTIVEC */
@@ -357,6 +359,11 @@ extern int set_endian(struct task_struct *tsk, unsigned int val);
 extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
 extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
 
+extern void load_fp_state(struct thread_fp_state *fp);
+extern void store_fp_state(struct thread_fp_state *fp);
+extern void load_vr_state(struct thread_vr_state *vr);
+extern void store_vr_state(struct thread_vr_state *vr);
+
 static inline unsigned int __unpack_fe01(unsigned long msr_bits)
 {
 	return ((msr_bits & MSR_FE0) >> 10) | ((msr_bits & MSR_FE1) >> 8);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8d27b61c95b9..6278edddc3f8 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -91,9 +91,11 @@ int main(void)
 #endif
 	DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
 	DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
+	DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area));
 	DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
 #ifdef CONFIG_ALTIVEC
 	DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
+	DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));
 	DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
 	DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
 	DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 34b96e6d2f0d..4dca05e91e95 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -80,6 +80,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	blr
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
+/*
+ * Load state from memory into FP registers including FPSCR.
+ * Assumes the caller has enabled FP in the MSR.
+ */
+_GLOBAL(load_fp_state)
+	lfd	fr0,FPSTATE_FPSCR(r3)
+	MTFSF_L(fr0)
+	REST_32FPVSRS(0, R4, R3)
+	blr
+
+/*
+ * Store FP state into memory, including FPSCR
+ * Assumes the caller has enabled FP in the MSR.
+ */
+_GLOBAL(store_fp_state)
+	SAVE_32FPVSRS(0, R4, R3)
+	mffs	fr0
+	stfd	fr0,FPSTATE_FPSCR(r3)
+	blr
+
 /*
  * This task wants to use the FPU now.
  * On UP, disable FP for the task which had the FPU previously,
@@ -172,9 +192,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	PPC_LCMPI	0,r3,0
 	beqlr-				/* if no previous owner, done */
 	addi	r3,r3,THREAD	        /* want THREAD of task */
+	PPC_LL	r6,THREAD_FPSAVEAREA(r3)
 	PPC_LL	r5,PT_REGS(r3)
-	PPC_LCMPI	0,r5,0
+	PPC_LCMPI	0,r6,0
+	bne	2f
 	addi	r6,r3,THREAD_FPSTATE
+2:	PPC_LCMPI	0,r5,0
 	SAVE_32FPVSRS(0, R4, R6)
 	mffs	fr0
 	stfd	fr0,FPSTATE_FPSCR(r6)
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 21646dbe1bb3..56a4bec1b11a 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -98,9 +98,13 @@ EXPORT_SYMBOL(start_thread);
 
 #ifdef CONFIG_PPC_FPU
 EXPORT_SYMBOL(giveup_fpu);
+EXPORT_SYMBOL(load_fp_state);
+EXPORT_SYMBOL(store_fp_state);
 #endif
 #ifdef CONFIG_ALTIVEC
 EXPORT_SYMBOL(giveup_altivec);
+EXPORT_SYMBOL(load_vr_state);
+EXPORT_SYMBOL(store_vr_state);
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
 EXPORT_SYMBOL(giveup_vsx);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7a281416affb..8649a3d629e1 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1008,6 +1008,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	p->thread.ptrace_bps[0] = NULL;
 #endif
 
+	p->thread.fp_save_area = NULL;
+#ifdef CONFIG_ALTIVEC
+	p->thread.vr_save_area = NULL;
+#endif
+
 #ifdef CONFIG_PPC_STD_MMU_64
 	if (mmu_has_feature(MMU_FTR_SLB)) {
 		unsigned long sp_vsid;
@@ -1114,9 +1119,11 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	current->thread.used_vsr = 0;
 #endif
 	memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
+	current->thread.fp_save_area = NULL;
 #ifdef CONFIG_ALTIVEC
 	memset(&current->thread.vr_state, 0, sizeof(current->thread.vr_state));
 	current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */
+	current->thread.vr_save_area = NULL;
 	current->thread.vrsave = 0;
 	current->thread.used_vr = 0;
 #endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index a48df870b696..eacda4eea2d7 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -36,6 +36,28 @@ _GLOBAL(do_load_up_transact_altivec)
 	blr
 #endif
 
+/*
+ * Load state from memory into VMX registers including VSCR.
+ * Assumes the caller has enabled VMX in the MSR.
+ */
+_GLOBAL(load_vr_state)
+	li	r4,VRSTATE_VSCR
+	lvx	vr0,r4,r3
+	mtvscr	vr0
+	REST_32VRS(0,r4,r3)
+	blr
+
+/*
+ * Store VMX state into memory, including VSCR.
+ * Assumes the caller has enabled VMX in the MSR.
+ */
+_GLOBAL(store_vr_state)
+	SAVE_32VRS(0, r4, r3)
+	mfvscr	vr0
+	li	r4, VRSTATE_VSCR
+	stvx	vr0, r4, r3
+	blr
+
 /*
  * Disable VMX for the task which had it previously,
  * and save its vector registers in its thread_struct.
@@ -144,9 +166,12 @@ _GLOBAL(giveup_altivec)
 	PPC_LCMPI	0,r3,0
 	beqlr				/* if no previous owner, done */
 	addi	r3,r3,THREAD		/* want THREAD of task */
-	addi	r7,r3,THREAD_VRSTATE
+	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
 	PPC_LL	r5,PT_REGS(r3)
-	PPC_LCMPI	0,r5,0
+	PPC_LCMPI	0,r7,0
+	bne	2f
+	addi	r7,r3,THREAD_VRSTATE
+2:	PPC_LCMPI	0,r5,0
 	SAVE_32VRS(0,r4,r7)
 	mfvscr	vr0
 	li	r4,VRSTATE_VSCR
-- 
cgit v1.2.3


From 2c5350e934501f1af8010c608d8dbf72ad25fdc6 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 2 Oct 2013 14:22:33 -0700
Subject: KVM: PPC: Get rid of KVM_HPAGE defines

Now when the main kvm code relying on these defines has been moved to
the x86 specific part of the world, we can get rid of these.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 33283532e9d8..0866230b7c2d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -63,11 +63,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 #endif
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x)	0
-#define KVM_NR_PAGE_SIZES	1
-#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
-
 #define HPTEG_CACHE_NUM			(1 << 15)
 #define HPTEG_HASH_BITS_PTE		13
 #define HPTEG_HASH_BITS_PTE_LONG	12
-- 
cgit v1.2.3


From 9863c28a2af90a56c088f5f6288d7f6d2c923c14 Mon Sep 17 00:00:00 2001
From: James Yang <James.Yang@freescale.com>
Date: Wed, 3 Jul 2013 16:26:47 -0500
Subject: powerpc: Emulate sync instruction variants

Reserved fields of the sync instruction have been used for other
instructions (e.g. lwsync).  On processors that do not support variants
of the sync instruction, emulate it by executing a sync to subsume the
effect of the intended instruction.

Signed-off-by: James Yang <James.Yang@freescale.com>
[scottwood@freescale.com: whitespace and subject line fix]
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/include/asm/ppc-opcode.h | 2 ++
 arch/powerpc/kernel/traps.c           | 7 +++++++
 2 files changed, 9 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index ad5fcf51b252..442edee4b6aa 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -143,6 +143,8 @@
 #define PPC_INST_LSWX			0x7c00042a
 #define PPC_INST_LWARX			0x7c000028
 #define PPC_INST_LWSYNC			0x7c2004ac
+#define PPC_INST_SYNC			0x7c0004ac
+#define PPC_INST_SYNC_MASK		0xfc0007fe
 #define PPC_INST_LXVD2X			0x7c000698
 #define PPC_INST_MCRXR			0x7c000400
 #define PPC_INST_MCRXR_MASK		0xfc0007fe
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f0a6814007a5..36a1f95a2a36 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1018,6 +1018,13 @@ static int emulate_instruction(struct pt_regs *regs)
 		return emulate_isel(regs, instword);
 	}
 
+	/* Emulate sync instruction variants */
+	if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
+		PPC_WARN_EMULATED(sync, regs);
+		asm volatile("sync");
+		return 0;
+	}
+
 #ifdef CONFIG_PPC64
 	/* Emulate the mfspr rD, DSCR. */
 	if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
-- 
cgit v1.2.3


From 3b7834743f9492e3509930feb4ca47135905e640 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Tue, 3 Sep 2013 11:13:12 +1000
Subject: KVM: PPC: Book3S HV: Reserve POWER8 space in get/set_one_reg

This reserves space in get/set_one_reg ioctl for the extra guest state
needed for POWER8.  It doesn't implement these at all, it just reserves
them so that the ABI is defined now.

A few things to note here:

- This add *a lot* state for transactional memory.  TM suspend mode,
  this is unavoidable, you can't simply roll back all transactions and
  store only the checkpointed state.  I've added this all to
  get/set_one_reg (including GPRs) rather than creating a new ioctl
  which returns a struct kvm_regs like KVM_GET_REGS does.  This means we
  if we need to extract the TM state, we are going to need a bucket load
  of IOCTLs.  Hopefully most of the time this will not be needed as we
  can look at the MSR to see if TM is active and only grab them when
  needed.  If this becomes a bottle neck in future we can add another
  ioctl to grab all this state in one go.

- The TM state is offset by 0x80000000.

- For TM, I've done away with VMX and FP and created a single 64x128 bit
  VSX register space.

- I've left a space of 1 (at 0x9c) since Paulus needs to add a value
  which applies to POWER7 as well.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt   | 39 +++++++++++++++++++++++++++
 arch/powerpc/include/uapi/asm/kvm.h | 54 +++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a89a5ee0b940..354a51ba456b 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1810,6 +1810,45 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TLB3PS	| 32
   PPC   | KVM_REG_PPC_EPTCFG	| 32
   PPC   | KVM_REG_PPC_ICP_STATE | 64
+  PPC   | KVM_REG_PPC_SPMC1	| 32
+  PPC   | KVM_REG_PPC_SPMC2	| 32
+  PPC   | KVM_REG_PPC_IAMR	| 64
+  PPC   | KVM_REG_PPC_TFHAR	| 64
+  PPC   | KVM_REG_PPC_TFIAR	| 64
+  PPC   | KVM_REG_PPC_TEXASR	| 64
+  PPC   | KVM_REG_PPC_FSCR	| 64
+  PPC   | KVM_REG_PPC_PSPB	| 32
+  PPC   | KVM_REG_PPC_EBBHR	| 64
+  PPC   | KVM_REG_PPC_EBBRR	| 64
+  PPC   | KVM_REG_PPC_BESCR	| 64
+  PPC   | KVM_REG_PPC_TAR	| 64
+  PPC   | KVM_REG_PPC_DPDES	| 64
+  PPC   | KVM_REG_PPC_DAWR	| 64
+  PPC   | KVM_REG_PPC_DAWRX	| 64
+  PPC   | KVM_REG_PPC_CIABR	| 64
+  PPC   | KVM_REG_PPC_IC	| 64
+  PPC   | KVM_REG_PPC_VTB	| 64
+  PPC   | KVM_REG_PPC_CSIGR	| 64
+  PPC   | KVM_REG_PPC_TACR	| 64
+  PPC   | KVM_REG_PPC_TCSCR	| 64
+  PPC   | KVM_REG_PPC_PID	| 64
+  PPC   | KVM_REG_PPC_ACOP	| 64
+  PPC   | KVM_REG_PPC_TM_GPR0	| 64
+          ...
+  PPC   | KVM_REG_PPC_TM_GPR31	| 64
+  PPC   | KVM_REG_PPC_TM_VSR0	| 128
+          ...
+  PPC   | KVM_REG_PPC_TM_VSR63	| 128
+  PPC   | KVM_REG_PPC_TM_CR	| 64
+  PPC   | KVM_REG_PPC_TM_LR	| 64
+  PPC   | KVM_REG_PPC_TM_CTR	| 64
+  PPC   | KVM_REG_PPC_TM_FPSCR	| 64
+  PPC   | KVM_REG_PPC_TM_AMR	| 64
+  PPC   | KVM_REG_PPC_TM_PPR	| 64
+  PPC   | KVM_REG_PPC_TM_VRSAVE	| 64
+  PPC   | KVM_REG_PPC_TM_VSCR	| 32
+  PPC   | KVM_REG_PPC_TM_DSCR	| 64
+  PPC   | KVM_REG_PPC_TM_TAR	| 64
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 0fb1a6e9ff90..7ed41c0b4045 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -429,6 +429,11 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_MMCR0	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
 #define KVM_REG_PPC_MMCR1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
 #define KVM_REG_PPC_MMCRA	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
+#define KVM_REG_PPC_MMCR2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x13)
+#define KVM_REG_PPC_MMCRS	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x14)
+#define KVM_REG_PPC_SIAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x15)
+#define KVM_REG_PPC_SDAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x16)
+#define KVM_REG_PPC_SIER	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x17)
 
 #define KVM_REG_PPC_PMC1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
 #define KVM_REG_PPC_PMC2	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
@@ -499,6 +504,55 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TLB3PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
 #define KVM_REG_PPC_EPTCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
 
+/* POWER8 registers */
+#define KVM_REG_PPC_SPMC1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9d)
+#define KVM_REG_PPC_SPMC2	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9e)
+#define KVM_REG_PPC_IAMR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9f)
+#define KVM_REG_PPC_TFHAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa0)
+#define KVM_REG_PPC_TFIAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa1)
+#define KVM_REG_PPC_TEXASR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa2)
+#define KVM_REG_PPC_FSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa3)
+#define KVM_REG_PPC_PSPB	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xa4)
+#define KVM_REG_PPC_EBBHR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa5)
+#define KVM_REG_PPC_EBBRR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa6)
+#define KVM_REG_PPC_BESCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa7)
+#define KVM_REG_PPC_TAR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa8)
+#define KVM_REG_PPC_DPDES	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa9)
+#define KVM_REG_PPC_DAWR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa)
+#define KVM_REG_PPC_DAWRX	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab)
+#define KVM_REG_PPC_CIABR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xac)
+#define KVM_REG_PPC_IC		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xad)
+#define KVM_REG_PPC_VTB		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xae)
+#define KVM_REG_PPC_CSIGR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaf)
+#define KVM_REG_PPC_TACR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb0)
+#define KVM_REG_PPC_TCSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
+#define KVM_REG_PPC_PID		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
+#define KVM_REG_PPC_ACOP	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
+
+/* Transactional Memory checkpointed state:
+ * This is all GPRs, all VSX regs and a subset of SPRs
+ */
+#define KVM_REG_PPC_TM		(KVM_REG_PPC | 0x80000000)
+/* TM GPRs */
+#define KVM_REG_PPC_TM_GPR0	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0)
+#define KVM_REG_PPC_TM_GPR(n)	(KVM_REG_PPC_TM_GPR0 + (n))
+#define KVM_REG_PPC_TM_GPR31	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x1f)
+/* TM VSX */
+#define KVM_REG_PPC_TM_VSR0	(KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x20)
+#define KVM_REG_PPC_TM_VSR(n)	(KVM_REG_PPC_TM_VSR0 + (n))
+#define KVM_REG_PPC_TM_VSR63	(KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x5f)
+/* TM SPRS */
+#define KVM_REG_PPC_TM_CR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x60)
+#define KVM_REG_PPC_TM_LR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x61)
+#define KVM_REG_PPC_TM_CTR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x62)
+#define KVM_REG_PPC_TM_FPSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x63)
+#define KVM_REG_PPC_TM_AMR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x64)
+#define KVM_REG_PPC_TM_PPR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x65)
+#define KVM_REG_PPC_TM_VRSAVE	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x66)
+#define KVM_REG_PPC_TM_VSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
+#define KVM_REG_PPC_TM_DSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
+#define KVM_REG_PPC_TM_TAR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+
 /* PPC64 eXternal Interrupt Controller Specification */
 #define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
 
-- 
cgit v1.2.3


From 14941789f2a13cd89e2dd567c4f708e571ab714e Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 6 Sep 2013 13:11:18 +1000
Subject: KVM: PPC: Book3S HV: Save/restore SIAR and SDAR along with other PMU
 registers

Currently we are not saving and restoring the SIAR and SDAR registers in
the PMU (performance monitor unit) on guest entry and exit.  The result
is that performance monitoring tools in the guest could get false
information about where a program was executing and what data it was
accessing at the time of a performance monitor interrupt.  This fixes
it by saving and restoring these registers along with the other PMU
registers on guest entry/exit.

This also provides a way for userspace to access these values for a
vcpu via the one_reg interface.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h     |  2 ++
 arch/powerpc/kernel/asm-offsets.c       |  2 ++
 arch/powerpc/kvm/book3s_hv.c            | 12 ++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  8 ++++++++
 4 files changed, 24 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0866230b7c2d..d9b21af62610 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -493,6 +493,8 @@ struct kvm_vcpu_arch {
 
 	u64 mmcr[3];
 	u32 pmc[8];
+	u64 siar;
+	u64 sdar;
 
 #ifdef CONFIG_KVM_EXIT_TIMING
 	struct mutex exit_timing_lock;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d8958be5f31a..19e699d747b7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -508,6 +508,8 @@ int main(void)
 	DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
 	DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
 	DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
+	DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
+	DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
 	DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
 	DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
 	DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 62a2b5ab08ed..45a9b876b0a0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -749,6 +749,12 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		i = id - KVM_REG_PPC_PMC1;
 		*val = get_reg_val(id, vcpu->arch.pmc[i]);
 		break;
+	case KVM_REG_PPC_SIAR:
+		*val = get_reg_val(id, vcpu->arch.siar);
+		break;
+	case KVM_REG_PPC_SDAR:
+		*val = get_reg_val(id, vcpu->arch.sdar);
+		break;
 #ifdef CONFIG_VSX
 	case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
 		if (cpu_has_feature(CPU_FTR_VSX)) {
@@ -833,6 +839,12 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		i = id - KVM_REG_PPC_PMC1;
 		vcpu->arch.pmc[i] = set_reg_val(id, *val);
 		break;
+	case KVM_REG_PPC_SIAR:
+		vcpu->arch.siar = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_SDAR:
+		vcpu->arch.sdar = set_reg_val(id, *val);
+		break;
 #ifdef CONFIG_VSX
 	case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
 		if (cpu_has_feature(CPU_FTR_VSX)) {
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 294b7af28cdd..541aea0ce91a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -200,8 +200,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	ld	r3, VCPU_MMCR(r4)
 	ld	r5, VCPU_MMCR + 8(r4)
 	ld	r6, VCPU_MMCR + 16(r4)
+	ld	r7, VCPU_SIAR(r4)
+	ld	r8, VCPU_SDAR(r4)
 	mtspr	SPRN_MMCR1, r5
 	mtspr	SPRN_MMCRA, r6
+	mtspr	SPRN_SIAR, r7
+	mtspr	SPRN_SDAR, r8
 	mtspr	SPRN_MMCR0, r3
 	isync
 
@@ -1134,9 +1138,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
 	b	22f
 21:	mfspr	r5, SPRN_MMCR1
+	mfspr	r7, SPRN_SIAR
+	mfspr	r8, SPRN_SDAR
 	std	r4, VCPU_MMCR(r9)
 	std	r5, VCPU_MMCR + 8(r9)
 	std	r6, VCPU_MMCR + 16(r9)
+	std	r7, VCPU_SIAR(r9)
+	std	r8, VCPU_SDAR(r9)
 	mfspr	r3, SPRN_PMC1
 	mfspr	r4, SPRN_PMC2
 	mfspr	r5, SPRN_PMC3
-- 
cgit v1.2.3


From 93b0f4dc29c5f077a1c97bd1d484147230c3779a Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 6 Sep 2013 13:17:46 +1000
Subject: KVM: PPC: Book3S HV: Implement timebase offset for guests

This allows guests to have a different timebase origin from the host.
This is needed for migration, where a guest can migrate from one host
to another and the two hosts might have a different timebase origin.
However, the timebase seen by the guest must not go backwards, and
should go forwards only by a small amount corresponding to the time
taken for the migration.

Therefore this provides a new per-vcpu value accessed via the one_reg
interface using the new KVM_REG_PPC_TB_OFFSET identifier.  This value
defaults to 0 and is not modified by KVM.  On entering the guest, this
value is added onto the timebase, and on exiting the guest, it is
subtracted from the timebase.

This is only supported for recent POWER hardware which has the TBU40
(timebase upper 40 bits) register.  Writing to the TBU40 register only
alters the upper 40 bits of the timebase, leaving the lower 24 bits
unchanged.  This provides a way to modify the timebase for guest
migration without disturbing the synchronization of the timebase
registers across CPU cores.  The kernel rounds up the value given
to a multiple of 2^24.

Timebase values stored in KVM structures (struct kvm_vcpu, struct
kvmppc_vcore, etc.) are stored as host timebase values.  The timebase
values in the dispatch trace log need to be guest timebase values,
however, since that is read directly by the guest.  This moves the
setting of vcpu->arch.dec_expires on guest exit to a point after we
have restored the host timebase so that vcpu->arch.dec_expires is a
host timebase value.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt       |  1 +
 arch/powerpc/include/asm/kvm_host.h     |  1 +
 arch/powerpc/include/asm/reg.h          |  1 +
 arch/powerpc/include/uapi/asm/kvm.h     |  3 ++
 arch/powerpc/kernel/asm-offsets.c       |  1 +
 arch/powerpc/kvm/book3s_hv.c            | 10 ++++++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 50 +++++++++++++++++++++++++++------
 7 files changed, 57 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 354a51ba456b..e43c6f14c225 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1810,6 +1810,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TLB3PS	| 32
   PPC   | KVM_REG_PPC_EPTCFG	| 32
   PPC   | KVM_REG_PPC_ICP_STATE | 64
+  PPC   | KVM_REG_PPC_TB_OFFSET	| 64
   PPC   | KVM_REG_PPC_SPMC1	| 32
   PPC   | KVM_REG_PPC_SPMC2	| 32
   PPC   | KVM_REG_PPC_IAMR	| 64
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d9b21af62610..e4d67a606e43 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -289,6 +289,7 @@ struct kvmppc_vcore {
 	u64 stolen_tb;
 	u64 preempt_tb;
 	struct kvm_vcpu *runner;
+	u64 tb_offset;		/* guest timebase - host timebase */
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 10d1ef016bf1..fd4db15e6f2a 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -243,6 +243,7 @@
 #define SPRN_TBRU	0x10D	/* Time Base Read Upper Register (user, R/O) */
 #define SPRN_TBWL	0x11C	/* Time Base Lower Register (super, R/W) */
 #define SPRN_TBWU	0x11D	/* Time Base Upper Register (super, R/W) */
+#define SPRN_TBU40	0x11E	/* Timebase upper 40 bits (hyper, R/W) */
 #define SPRN_SPURR	0x134	/* Scaled PURR */
 #define SPRN_HSPRG0	0x130	/* Hypervisor Scratch 0 */
 #define SPRN_HSPRG1	0x131	/* Hypervisor Scratch 1 */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 7ed41c0b4045..a8124fe93fb9 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -504,6 +504,9 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TLB3PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
 #define KVM_REG_PPC_EPTCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
 
+/* Timebase offset */
+#define KVM_REG_PPC_TB_OFFSET	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9c)
+
 /* POWER8 registers */
 #define KVM_REG_PPC_SPMC1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9d)
 #define KVM_REG_PPC_SPMC2	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9e)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 19e699d747b7..34d63d871917 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -523,6 +523,7 @@ int main(void)
 	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
 	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
+	DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
 	DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
 			   offsetof(struct kvmppc_vcpu_book3s, vcpu));
 	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 45a9b876b0a0..56f57af7e738 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -489,7 +489,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	memset(dt, 0, sizeof(struct dtl_entry));
 	dt->dispatch_reason = 7;
 	dt->processor_id = vc->pcpu + vcpu->arch.ptid;
-	dt->timebase = now;
+	dt->timebase = now + vc->tb_offset;
 	dt->enqueue_to_dispatch_time = stolen;
 	dt->srr0 = kvmppc_get_pc(vcpu);
 	dt->srr1 = vcpu->arch.shregs.msr;
@@ -793,6 +793,9 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		val->vpaval.length = vcpu->arch.dtl.len;
 		spin_unlock(&vcpu->arch.vpa_update_lock);
 		break;
+	case KVM_REG_PPC_TB_OFFSET:
+		*val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -892,6 +895,11 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		len -= len % sizeof(struct dtl_entry);
 		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
 		break;
+	case KVM_REG_PPC_TB_OFFSET:
+		/* round up to multiple of 2^24 */
+		vcpu->arch.vcore->tb_offset =
+			ALIGN(set_reg_val(id, *val), 1UL << 24);
+		break;
 	default:
 		r = -EINVAL;
 		break;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 541aea0ce91a..82b06dfe2d27 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -347,7 +347,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	bdnz	28b
 	ptesync
 
-22:	li	r0,1
+	/* Add timebase offset onto timebase */
+22:	ld	r8,VCORE_TB_OFFSET(r5)
+	cmpdi	r8,0
+	beq	37f
+	mftb	r6		/* current host timebase */
+	add	r8,r8,r6
+	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
+	mftb	r7		/* check if lower 24 bits overflowed */
+	clrldi	r6,r6,40
+	clrldi	r7,r7,40
+	cmpld	r7,r6
+	bge	37f
+	addis	r8,r8,0x100	/* if so, increment upper 40 bits */
+	mtspr	SPRN_TBU40,r8
+
+37:	li	r0,1
 	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
 	b	10f
 
@@ -782,13 +797,6 @@ ext_stash_for_host:
 ext_interrupt_to_host:
 
 guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
-	/* Save DEC */
-	mfspr	r5,SPRN_DEC
-	mftb	r6
-	extsw	r5,r5
-	add	r5,r5,r6
-	std	r5,VCPU_DEC_EXPIRES(r9)
-
 	/* Save more register state  */
 	mfdar	r6
 	mfdsisr	r7
@@ -958,7 +966,24 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
 	mtspr	SPRN_LPID,r7
 	isync
-	li	r0,0
+
+	/* Subtract timebase offset from timebase */
+	ld	r8,VCORE_TB_OFFSET(r5)
+	cmpdi	r8,0
+	beq	17f
+	mftb	r6			/* current host timebase */
+	subf	r8,r8,r6
+	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
+	mftb	r7			/* check if lower 24 bits overflowed */
+	clrldi	r6,r6,40
+	clrldi	r7,r7,40
+	cmpld	r7,r6
+	bge	17f
+	addis	r8,r8,0x100		/* if so, increment upper 40 bits */
+	mtspr	SPRN_TBU40,r8
+
+	/* Signal secondary CPUs to continue */
+17:	li	r0,0
 	stb	r0,VCORE_IN_GUEST(r5)
 	lis	r8,0x7fff		/* MAX_INT@h */
 	mtspr	SPRN_HDEC,r8
@@ -1056,6 +1081,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 1:	addi	r8,r8,16
 	.endr
 
+	/* Save DEC */
+	mfspr	r5,SPRN_DEC
+	mftb	r6
+	extsw	r5,r5
+	add	r5,r5,r6
+	std	r5,VCPU_DEC_EXPIRES(r9)
+
 	/* Save and reset AMR and UAMOR before turning on the MMU */
 BEGIN_FTR_SECTION
 	mfspr	r5,SPRN_AMR
-- 
cgit v1.2.3


From c0867fd50943d71d9f0bc5078393ce4ec76b3923 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 6 Sep 2013 13:18:32 +1000
Subject: KVM: PPC: Book3S: Add GET/SET_ONE_REG interface for VRSAVE

The VRSAVE register value for a vcpu is accessible through the
GET/SET_SREGS interface for Book E processors, but not for Book 3S
processors.  In order to make this accessible for Book 3S processors,
this adds a new register identifier for GET/SET_ONE_REG, and adds
the code to implement it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt   |  1 +
 arch/powerpc/include/uapi/asm/kvm.h |  2 ++
 arch/powerpc/kvm/book3s.c           | 10 ++++++++++
 3 files changed, 13 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e43c6f14c225..26fc37355dcb 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1834,6 +1834,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TCSCR	| 64
   PPC   | KVM_REG_PPC_PID	| 64
   PPC   | KVM_REG_PPC_ACOP	| 64
+  PPC   | KVM_REG_PPC_VRSAVE	| 32
   PPC   | KVM_REG_PPC_TM_GPR0	| 64
           ...
   PPC   | KVM_REG_PPC_TM_GPR31	| 64
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index a8124fe93fb9..b98bf3f50527 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -532,6 +532,8 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_PID		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
 #define KVM_REG_PPC_ACOP	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
 
+#define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 700df6f1d32c..f97369dc457c 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -528,6 +528,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 			}
 			val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
 			break;
+		case KVM_REG_PPC_VRSAVE:
+			val = get_reg_val(reg->id, vcpu->arch.vrsave);
+			break;
 #endif /* CONFIG_ALTIVEC */
 		case KVM_REG_PPC_DEBUG_INST: {
 			u32 opcode = INS_TW;
@@ -605,6 +608,13 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 			}
 			vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
 			break;
+		case KVM_REG_PPC_VRSAVE:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vrsave = set_reg_val(reg->id, val);
+			break;
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_KVM_XICS
 		case KVM_REG_PPC_ICP_STATE:
-- 
cgit v1.2.3


From a0144e2a6b0b4a137a32f0102354782547bf0935 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 20 Sep 2013 14:52:38 +1000
Subject: KVM: PPC: Book3S HV: Store LPCR value for each virtual core

This adds the ability to have a separate LPCR (Logical Partitioning
Control Register) value relating to a guest for each virtual core,
rather than only having a single value for the whole VM.  This
corresponds to what real POWER hardware does, where there is a LPCR
per CPU thread but most of the fields are required to have the same
value on all active threads in a core.

The per-virtual-core LPCR can be read and written using the
GET/SET_ONE_REG interface.  Userspace can can only modify the
following fields of the LPCR value:

DPFD	Default prefetch depth
ILE	Interrupt little-endian
TC	Translation control (secondary HPT hash group search disable)

We still maintain a per-VM default LPCR value in kvm->arch.lpcr, which
contains bits relating to memory management, i.e. the Virtualized
Partition Memory (VPM) bits and the bits relating to guest real mode.
When this default value is updated, the update needs to be propagated
to the per-vcore values, so we add a kvmppc_update_lpcr() helper to do
that.

Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: fix whitespace]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt       |  1 +
 arch/powerpc/include/asm/kvm_book3s.h   |  2 +
 arch/powerpc/include/asm/kvm_host.h     |  1 +
 arch/powerpc/include/asm/reg.h          |  3 ++
 arch/powerpc/include/uapi/asm/kvm.h     |  1 +
 arch/powerpc/kernel/asm-offsets.c       |  1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c     |  5 +--
 arch/powerpc/kvm/book3s_hv.c            | 73 +++++++++++++++++++++++++++------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  5 ++-
 9 files changed, 75 insertions(+), 17 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 26fc37355dcb..387f4c7dad9f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1835,6 +1835,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_PID	| 64
   PPC   | KVM_REG_PPC_ACOP	| 64
   PPC   | KVM_REG_PPC_VRSAVE	| 32
+  PPC   | KVM_REG_PPC_LPCR	| 64
   PPC   | KVM_REG_PPC_TM_GPR0	| 64
           ...
   PPC   | KVM_REG_PPC_TM_GPR31	| 64
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index fa19e2f1a874..14a47416bdd4 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -172,6 +172,8 @@ extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 			unsigned long *hpret);
 extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
 			struct kvm_memory_slot *memslot, unsigned long *map);
+extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
+			unsigned long mask);
 
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e4d67a606e43..6eabffcb1c3c 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -290,6 +290,7 @@ struct kvmppc_vcore {
 	u64 preempt_tb;
 	struct kvm_vcpu *runner;
 	u64 tb_offset;		/* guest timebase - host timebase */
+	ulong lpcr;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fd4db15e6f2a..4bec4df3fb98 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -284,6 +284,7 @@
 #define   LPCR_ISL	(1ul << (63-2))
 #define   LPCR_VC_SH	(63-2)
 #define   LPCR_DPFD_SH	(63-11)
+#define   LPCR_DPFD	(7ul << LPCR_DPFD_SH)
 #define   LPCR_VRMASD	(0x1ful << (63-16))
 #define   LPCR_VRMA_L	(1ul << (63-12))
 #define   LPCR_VRMA_LP0	(1ul << (63-15))
@@ -300,6 +301,7 @@
 #define     LPCR_PECE2	0x00001000	/* machine check etc can cause exit */
 #define   LPCR_MER	0x00000800	/* Mediated External Exception */
 #define   LPCR_MER_SH	11
+#define   LPCR_TC      0x00000200	/* Translation control */
 #define   LPCR_LPES    0x0000000c
 #define   LPCR_LPES0   0x00000008      /* LPAR Env selector 0 */
 #define   LPCR_LPES1   0x00000004      /* LPAR Env selector 1 */
@@ -421,6 +423,7 @@
 #define	 HID4_RMLS2_SH	 (63 - 2)	/* Real mode limit bottom 2 bits */
 #define	 HID4_LPID5_SH	 (63 - 6)	/* partition ID bottom 4 bits */
 #define	 HID4_RMOR_SH	 (63 - 22)	/* real mode offset (16 bits) */
+#define  HID4_RMOR	 (0xFFFFul << HID4_RMOR_SH)
 #define  HID4_LPES1	 (1 << (63-57))	/* LPAR env. sel. bit 1 */
 #define  HID4_RMLS0_SH	 (63 - 58)	/* Real mode limit top bit */
 #define	 HID4_LPID1_SH	 0		/* partition ID top 2 bits */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index b98bf3f50527..e42127d1ae8e 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -533,6 +533,7 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_ACOP	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
 
 #define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
+#define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 34d63d871917..fd7513f8014b 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -524,6 +524,7 @@ int main(void)
 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
 	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
 	DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
+	DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
 	DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
 			   offsetof(struct kvmppc_vcpu_book3s, vcpu));
 	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 043eec8461e7..ccb89a048bf8 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1512,9 +1512,8 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 
 				kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
 					(VRMA_VSID << SLB_VSID_SHIFT_1T);
-				lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
-				lpcr |= senc << (LPCR_VRMASD_SH - 4);
-				kvm->arch.lpcr = lpcr;
+				lpcr = senc << (LPCR_VRMASD_SH - 4);
+				kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
 				rma_setup = 1;
 			}
 			++i;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a010aa4cd026..36eb95cc48ae 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -195,7 +195,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 		pr_err("  ESID = %.16llx VSID = %.16llx\n",
 		       vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
 	pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
-	       vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
+	       vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
 	       vcpu->arch.last_inst);
 }
 
@@ -723,6 +723,21 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	u64 mask;
+
+	spin_lock(&vc->lock);
+	/*
+	 * Userspace can only modify DPFD (default prefetch depth),
+	 * ILE (interrupt little-endian) and TC (translation control).
+	 */
+	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
+	vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
+	spin_unlock(&vc->lock);
+}
+
 int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 {
 	int r = 0;
@@ -805,6 +820,9 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 	case KVM_REG_PPC_TB_OFFSET:
 		*val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
 		break;
+	case KVM_REG_PPC_LPCR:
+		*val = get_reg_val(id, vcpu->arch.vcore->lpcr);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -909,6 +927,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		vcpu->arch.vcore->tb_offset =
 			ALIGN(set_reg_val(id, *val), 1UL << 24);
 		break;
+	case KVM_REG_PPC_LPCR:
+		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val));
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -969,6 +990,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 			spin_lock_init(&vcore->lock);
 			init_waitqueue_head(&vcore->wq);
 			vcore->preempt_tb = TB_NIL;
+			vcore->lpcr = kvm->arch.lpcr;
 		}
 		kvm->arch.vcores[core] = vcore;
 		kvm->arch.online_vcores++;
@@ -1758,6 +1780,32 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
 	}
 }
 
+/*
+ * Update LPCR values in kvm->arch and in vcores.
+ * Caller must hold kvm->lock.
+ */
+void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
+{
+	long int i;
+	u32 cores_done = 0;
+
+	if ((kvm->arch.lpcr & mask) == lpcr)
+		return;
+
+	kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
+
+	for (i = 0; i < KVM_MAX_VCORES; ++i) {
+		struct kvmppc_vcore *vc = kvm->arch.vcores[i];
+		if (!vc)
+			continue;
+		spin_lock(&vc->lock);
+		vc->lpcr = (vc->lpcr & ~mask) | lpcr;
+		spin_unlock(&vc->lock);
+		if (++cores_done >= kvm->arch.online_vcores)
+			break;
+	}
+}
+
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
@@ -1766,7 +1814,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
-	unsigned long lpcr, senc;
+	unsigned long lpcr = 0, senc;
+	unsigned long lpcr_mask = 0;
 	unsigned long psize, porder;
 	unsigned long rma_size;
 	unsigned long rmls;
@@ -1831,9 +1880,9 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 		senc = slb_pgsize_encoding(psize);
 		kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
 			(VRMA_VSID << SLB_VSID_SHIFT_1T);
-		lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
-		lpcr |= senc << (LPCR_VRMASD_SH - 4);
-		kvm->arch.lpcr = lpcr;
+		lpcr_mask = LPCR_VRMASD;
+		/* the -4 is to account for senc values starting at 0x10 */
+		lpcr = senc << (LPCR_VRMASD_SH - 4);
 
 		/* Create HPTEs in the hash page table for the VRMA */
 		kvmppc_map_vrma(vcpu, memslot, porder);
@@ -1854,23 +1903,21 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 		kvm->arch.rma = ri;
 
 		/* Update LPCR and RMOR */
-		lpcr = kvm->arch.lpcr;
 		if (cpu_has_feature(CPU_FTR_ARCH_201)) {
 			/* PPC970; insert RMLS value (split field) in HID4 */
-			lpcr &= ~((1ul << HID4_RMLS0_SH) |
-				  (3ul << HID4_RMLS2_SH));
-			lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
+			lpcr_mask = (1ul << HID4_RMLS0_SH) |
+				(3ul << HID4_RMLS2_SH) | HID4_RMOR;
+			lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
 				((rmls & 3) << HID4_RMLS2_SH);
 			/* RMOR is also in HID4 */
 			lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
 				<< HID4_RMOR_SH;
 		} else {
 			/* POWER7 */
-			lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
-			lpcr |= rmls << LPCR_RMLS_SH;
+			lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
+			lpcr = rmls << LPCR_RMLS_SH;
 			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
 		}
-		kvm->arch.lpcr = lpcr;
 		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
 			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
 
@@ -1889,6 +1936,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
 	/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
 	smp_wmb();
 	kvm->arch.rma_setup_done = 1;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 0effcd144241..295fd58af39a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -509,7 +509,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	beq	20b
 
 	/* Set LPCR and RMOR. */
-10:	ld	r8,KVM_LPCR(r9)
+10:	ld	r8,VCORE_LPCR(r5)
 	mtspr	SPRN_LPCR,r8
 	ld	r8,KVM_RMOR(r9)
 	mtspr	SPRN_RMOR,r8
@@ -571,7 +571,8 @@ toc_tlbie_lock:
 	bne	24b
 	isync
 
-	ld	r7,KVM_LPCR(r9)		/* use kvm->arch.lpcr to store HID4 */
+	ld	r5,HSTATE_KVM_VCORE(r13)
+	ld	r7,VCORE_LPCR(r5)	/* use vcore->lpcr to store HID4 */
 	li	r0,0x18f
 	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
 	or	r0,r7,r0
-- 
cgit v1.2.3


From 4b8473c9c19dff1b0c672f182cc50b9952cf42e7 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 20 Sep 2013 14:52:39 +1000
Subject: KVM: PPC: Book3S HV: Add support for guest Program Priority Register

POWER7 and later IBM server processors have a register called the
Program Priority Register (PPR), which controls the priority of
each hardware CPU SMT thread, and affects how fast it runs compared
to other SMT threads.  This priority can be controlled by writing to
the PPR or by use of a set of instructions of the form or rN,rN,rN
which are otherwise no-ops but have been defined to set the priority
to particular levels.

This adds code to context switch the PPR when entering and exiting
guests and to make the PPR value accessible through the SET/GET_ONE_REG
interface.  When entering the guest, we set the PPR as late as
possible, because if we are setting a low thread priority it will
make the code run slowly from that point on.  Similarly, the
first-level interrupt handlers save the PPR value in the PACA very
early on, and set the thread priority to the medium level, so that
the interrupt handling code runs at a reasonable speed.

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt         |  1 +
 arch/powerpc/include/asm/exception-64s.h  |  8 ++++++++
 arch/powerpc/include/asm/kvm_book3s_asm.h |  1 +
 arch/powerpc/include/asm/kvm_host.h       |  1 +
 arch/powerpc/include/uapi/asm/kvm.h       |  1 +
 arch/powerpc/kernel/asm-offsets.c         |  2 ++
 arch/powerpc/kvm/book3s_hv.c              |  6 ++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 12 +++++++++++-
 8 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/include')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 387f4c7dad9f..a9d1072dcbec 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1836,6 +1836,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_ACOP	| 64
   PPC   | KVM_REG_PPC_VRSAVE	| 32
   PPC   | KVM_REG_PPC_LPCR	| 64
+  PPC   | KVM_REG_PPC_PPR	| 64
   PPC   | KVM_REG_PPC_TM_GPR0	| 64
           ...
   PPC   | KVM_REG_PPC_TM_GPR31	| 64
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index cca12f084842..402c1c466509 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -204,6 +204,10 @@ do_kvm_##n:								\
 	ld	r10,area+EX_CFAR(r13);					\
 	std	r10,HSTATE_CFAR(r13);					\
 	END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947);		\
+	BEGIN_FTR_SECTION_NESTED(948)					\
+	ld	r10,area+EX_PPR(r13);					\
+	std	r10,HSTATE_PPR(r13);					\
+	END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948);	\
 	ld	r10,area+EX_R10(r13);					\
 	stw	r9,HSTATE_SCRATCH1(r13);				\
 	ld	r9,area+EX_R9(r13);					\
@@ -217,6 +221,10 @@ do_kvm_##n:								\
 	ld	r10,area+EX_R10(r13);					\
 	beq	89f;							\
 	stw	r9,HSTATE_SCRATCH1(r13);			\
+	BEGIN_FTR_SECTION_NESTED(948)					\
+	ld	r9,area+EX_PPR(r13);					\
+	std	r9,HSTATE_PPR(r13);					\
+	END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948);	\
 	ld	r9,area+EX_R9(r13);					\
 	std	r12,HSTATE_SCRATCH0(r13);			\
 	li	r12,n;							\
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 9039d3c97eec..22f46061ae84 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -101,6 +101,7 @@ struct kvmppc_host_state {
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64
 	u64 cfar;
+	u64 ppr;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6eabffcb1c3c..4934e13fb23c 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -460,6 +460,7 @@ struct kvm_vcpu_arch {
 	u32 ctrl;
 	ulong dabr;
 	ulong cfar;
+	ulong ppr;
 #endif
 	u32 vrsave; /* also USPRG0 */
 	u32 mmucr;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index e42127d1ae8e..fab6bc1f8e90 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -534,6 +534,7 @@ struct kvm_get_htab_header {
 
 #define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
 #define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
+#define KVM_REG_PPC_PPR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index fd7513f8014b..5fda4ef489ad 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -519,6 +519,7 @@ int main(void)
 	DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
 	DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
 	DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
+	DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
 	DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
 	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
@@ -604,6 +605,7 @@ int main(void)
 
 #ifdef CONFIG_PPC_BOOK3S_64
 	HSTATE_FIELD(HSTATE_CFAR, cfar);
+	HSTATE_FIELD(HSTATE_PPR, ppr);
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 #else /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 36eb95cc48ae..2a0e38feec1d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -823,6 +823,9 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 	case KVM_REG_PPC_LPCR:
 		*val = get_reg_val(id, vcpu->arch.vcore->lpcr);
 		break;
+	case KVM_REG_PPC_PPR:
+		*val = get_reg_val(id, vcpu->arch.ppr);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -930,6 +933,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 	case KVM_REG_PPC_LPCR:
 		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val));
 		break;
+	case KVM_REG_PPC_PPR:
+		vcpu->arch.ppr = set_reg_val(id, *val);
+		break;
 	default:
 		r = -EINVAL;
 		break;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 295fd58af39a..a81979becf41 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -717,13 +717,15 @@ BEGIN_FTR_SECTION
 	ld	r5, VCPU_CFAR(r4)
 	mtspr	SPRN_CFAR, r5
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+	ld	r0, VCPU_PPR(r4)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	ld	r5, VCPU_LR(r4)
 	lwz	r6, VCPU_CR(r4)
 	mtlr	r5
 	mtcr	r6
 
-	ld	r0, VCPU_GPR(R0)(r4)
 	ld	r1, VCPU_GPR(R1)(r4)
 	ld	r2, VCPU_GPR(R2)(r4)
 	ld	r3, VCPU_GPR(R3)(r4)
@@ -737,6 +739,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	ld	r12, VCPU_GPR(R12)(r4)
 	ld	r13, VCPU_GPR(R13)(r4)
 
+BEGIN_FTR_SECTION
+	mtspr	SPRN_PPR, r0
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+	ld	r0, VCPU_GPR(R0)(r4)
 	ld	r4, VCPU_GPR(R4)(r4)
 
 	hrfid
@@ -787,6 +793,10 @@ BEGIN_FTR_SECTION
 	ld	r3, HSTATE_CFAR(r13)
 	std	r3, VCPU_CFAR(r9)
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+	ld	r4, HSTATE_PPR(r13)
+	std	r4, VCPU_PPR(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	/* Restore R1/R2 so we can handle faults */
 	ld	r1, HSTATE_HOST_R1(r13)
-- 
cgit v1.2.3


From 388cc6e133132e6c9b64e7d5361114a3a7d57663 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 21 Sep 2013 14:35:02 +1000
Subject: KVM: PPC: Book3S HV: Support POWER6 compatibility mode on POWER7

This enables us to use the Processor Compatibility Register (PCR) on
POWER7 to put the processor into architecture 2.05 compatibility mode
when running a guest.  In this mode the new instructions and registers
that were introduced on POWER7 are disabled in user mode.  This
includes all the VSX facilities plus several other instructions such
as ldbrx, stdbrx, popcntw, popcntd, etc.

To select this mode, we have a new register accessible through the
set/get_one_reg interface, called KVM_REG_PPC_ARCH_COMPAT.  Setting
this to zero gives the full set of capabilities of the processor.
Setting it to one of the "logical" PVR values defined in PAPR puts
the vcpu into the compatibility mode for the corresponding
architecture level.  The supported values are:

0x0f000002	Architecture 2.05 (POWER6)
0x0f000003	Architecture 2.06 (POWER7)
0x0f100003	Architecture 2.06+ (POWER7+)

Since the PCR is per-core, the architecture compatibility level and
the corresponding PCR value are stored in the struct kvmppc_vcore, and
are therefore shared between all vcpus in a virtual core.

Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: squash in fix to add missing break statements and documentation]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt       |  1 +
 arch/powerpc/include/asm/kvm_host.h     |  2 ++
 arch/powerpc/include/asm/reg.h          | 11 +++++++++++
 arch/powerpc/include/uapi/asm/kvm.h     |  3 +++
 arch/powerpc/kernel/asm-offsets.c       |  1 +
 arch/powerpc/kvm/book3s_hv.c            | 35 +++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 16 +++++++++++++--
 7 files changed, 67 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a9d1072dcbec..25a19576493f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1837,6 +1837,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_VRSAVE	| 32
   PPC   | KVM_REG_PPC_LPCR	| 64
   PPC   | KVM_REG_PPC_PPR	| 64
+  PPC   | KVM_REG_PPC_ARCH_COMPAT 32
   PPC   | KVM_REG_PPC_TM_GPR0	| 64
           ...
   PPC   | KVM_REG_PPC_TM_GPR31	| 64
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 4934e13fb23c..b1e8f2ba2a9d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -291,6 +291,8 @@ struct kvmppc_vcore {
 	struct kvm_vcpu *runner;
 	u64 tb_offset;		/* guest timebase - host timebase */
 	ulong lpcr;
+	u32 arch_compat;
+	ulong pcr;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 4bec4df3fb98..e294673e9d4b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -314,6 +314,10 @@
 #define   LPID_RSVD	0x3ff		/* Reserved LPID for partn switching */
 #define	SPRN_HMER	0x150	/* Hardware m? error recovery */
 #define	SPRN_HMEER	0x151	/* Hardware m? enable error recovery */
+#define SPRN_PCR	0x152	/* Processor compatibility register */
+#define   PCR_VEC_DIS	(1ul << (63-0))	/* Vec. disable (bit NA since POWER8) */
+#define   PCR_VSX_DIS	(1ul << (63-1))	/* VSX disable (bit NA since POWER8) */
+#define   PCR_ARCH_205	0x2		/* Architecture 2.05 */
 #define	SPRN_HEIR	0x153	/* Hypervisor Emulated Instruction Register */
 #define SPRN_TLBINDEXR	0x154	/* P7 TLB control register */
 #define SPRN_TLBVPNR	0x155	/* P7 TLB control register */
@@ -1106,6 +1110,13 @@
 #define PVR_BE		0x0070
 #define PVR_PA6T	0x0090
 
+/* "Logical" PVR values defined in PAPR, representing architecture levels */
+#define PVR_ARCH_204	0x0f000001
+#define PVR_ARCH_205	0x0f000002
+#define PVR_ARCH_206	0x0f000003
+#define PVR_ARCH_206p	0x0f100003
+#define PVR_ARCH_207	0x0f000004
+
 /* Macros for setting and retrieving special purpose registers */
 #ifndef __ASSEMBLY__
 #define mfmsr()		({unsigned long rval; \
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index fab6bc1f8e90..e420d46d363f 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -536,6 +536,9 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
 #define KVM_REG_PPC_PPR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6)
 
+/* Architecture compatibility level */
+#define KVM_REG_PPC_ARCH_COMPAT	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 5fda4ef489ad..5a285efba174 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -526,6 +526,7 @@ int main(void)
 	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
 	DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
 	DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
+	DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
 	DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
 			   offsetof(struct kvmppc_vcpu_book3s, vcpu));
 	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2a0e38feec1d..e42fb5448608 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -166,6 +166,35 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
 	vcpu->arch.pvr = pvr;
 }
 
+int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
+{
+	unsigned long pcr = 0;
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	if (arch_compat) {
+		if (!cpu_has_feature(CPU_FTR_ARCH_206))
+			return -EINVAL;	/* 970 has no compat mode support */
+
+		switch (arch_compat) {
+		case PVR_ARCH_205:
+			pcr = PCR_ARCH_205;
+			break;
+		case PVR_ARCH_206:
+		case PVR_ARCH_206p:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	spin_lock(&vc->lock);
+	vc->arch_compat = arch_compat;
+	vc->pcr = pcr;
+	spin_unlock(&vc->lock);
+
+	return 0;
+}
+
 void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 {
 	int r;
@@ -826,6 +855,9 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 	case KVM_REG_PPC_PPR:
 		*val = get_reg_val(id, vcpu->arch.ppr);
 		break;
+	case KVM_REG_PPC_ARCH_COMPAT:
+		*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -936,6 +968,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 	case KVM_REG_PPC_PPR:
 		vcpu->arch.ppr = set_reg_val(id, *val);
 		break;
+	case KVM_REG_PPC_ARCH_COMPAT:
+		r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
+		break;
 	default:
 		r = -EINVAL;
 		break;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index a81979becf41..cd39632a646e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -499,7 +499,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	addis	r8,r8,0x100	/* if so, increment upper 40 bits */
 	mtspr	SPRN_TBU40,r8
 
-37:	li	r0,1
+	/* Load guest PCR value to select appropriate compat mode */
+37:	ld	r7, VCORE_PCR(r5)
+	cmpdi	r7, 0
+	beq	38f
+	mtspr	SPRN_PCR, r7
+38:
+	li	r0,1
 	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
 	b	10f
 
@@ -1094,8 +1100,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	addis	r8,r8,0x100		/* if so, increment upper 40 bits */
 	mtspr	SPRN_TBU40,r8
 
+	/* Reset PCR */
+17:	ld	r0, VCORE_PCR(r5)
+	cmpdi	r0, 0
+	beq	18f
+	li	r0, 0
+	mtspr	SPRN_PCR, r0
+18:
 	/* Signal secondary CPUs to continue */
-17:	li	r0,0
 	stb	r0,VCORE_IN_GUEST(r5)
 	lis	r8,0x7fff		/* MAX_INT@h */
 	mtspr	SPRN_HDEC,r8
-- 
cgit v1.2.3


From a2d56020d1d91934e7bb3e7c8a5a3b5921ce121b Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 20 Sep 2013 14:52:43 +1000
Subject: KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than
 shadow_vcpu

Currently PR-style KVM keeps the volatile guest register values
(R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than
the main kvm_vcpu struct.  For 64-bit, the shadow_vcpu exists in two
places, a kmalloc'd struct and in the PACA, and it gets copied back
and forth in kvmppc_core_vcpu_load/put(), because the real-mode code
can't rely on being able to access the kmalloc'd struct.

This changes the code to copy the volatile values into the shadow_vcpu
as one of the last things done before entering the guest.  Similarly
the values are copied back out of the shadow_vcpu to the kvm_vcpu
immediately after exiting the guest.  We arrange for interrupts to be
still disabled at this point so that we can't get preempted on 64-bit
and end up copying values from the wrong PACA.

This means that the accessor functions in kvm_book3s.h for these
registers are greatly simplified, and are same between PR and HV KVM.
In places where accesses to shadow_vcpu fields are now replaced by
accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs.
Finally, on 64-bit, we don't need the kmalloc'd struct at all any more.

With this, the time to read the PVR one million times in a loop went
from 567.7ms to 575.5ms (averages of 6 values), an increase of about
1.4% for this worse-case test for guest entries and exits.  The
standard deviation of the measurements is about 11ms, so the
difference is only marginally significant statistically.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h     | 220 +++++-------------------------
 arch/powerpc/include/asm/kvm_book3s_asm.h |   6 +-
 arch/powerpc/include/asm/kvm_host.h       |   1 +
 arch/powerpc/kernel/asm-offsets.c         |   4 +-
 arch/powerpc/kvm/book3s_emulate.c         |   8 +-
 arch/powerpc/kvm/book3s_interrupts.S      |  27 +++-
 arch/powerpc/kvm/book3s_pr.c              | 122 ++++++++++++-----
 arch/powerpc/kvm/book3s_rmhandlers.S      |   6 +-
 arch/powerpc/kvm/trace.h                  |   7 +-
 9 files changed, 162 insertions(+), 239 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 14a47416bdd4..40f22d9c704c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -200,140 +200,76 @@ extern void kvm_return_point(void);
 #include <asm/kvm_book3s_64.h>
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_PR
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-	return to_book3s(vcpu)->hior;
-}
-
-static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
-			unsigned long pending_now, unsigned long old_pending)
-{
-	if (pending_now)
-		vcpu->arch.shared->int_pending = 1;
-	else if (old_pending)
-		vcpu->arch.shared->int_pending = 0;
-}
-
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
-	if ( num < 14 ) {
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-		svcpu->gpr[num] = val;
-		svcpu_put(svcpu);
-		to_book3s(vcpu)->shadow_vcpu->gpr[num] = val;
-	} else
-		vcpu->arch.gpr[num] = val;
+	vcpu->arch.gpr[num] = val;
 }
 
 static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
 {
-	if ( num < 14 ) {
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-		ulong r = svcpu->gpr[num];
-		svcpu_put(svcpu);
-		return r;
-	} else
-		return vcpu->arch.gpr[num];
+	return vcpu->arch.gpr[num];
 }
 
 static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->cr = val;
-	svcpu_put(svcpu);
-	to_book3s(vcpu)->shadow_vcpu->cr = val;
+	vcpu->arch.cr = val;
 }
 
 static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	u32 r;
-	r = svcpu->cr;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.cr;
 }
 
 static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->xer = val;
-	to_book3s(vcpu)->shadow_vcpu->xer = val;
-	svcpu_put(svcpu);
+	vcpu->arch.xer = val;
 }
 
 static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	u32 r;
-	r = svcpu->xer;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.xer;
 }
 
 static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->ctr = val;
-	svcpu_put(svcpu);
+	vcpu->arch.ctr = val;
 }
 
 static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	ulong r;
-	r = svcpu->ctr;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.ctr;
 }
 
 static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->lr = val;
-	svcpu_put(svcpu);
+	vcpu->arch.lr = val;
 }
 
 static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	ulong r;
-	r = svcpu->lr;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.lr;
 }
 
 static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->pc = val;
-	svcpu_put(svcpu);
+	vcpu->arch.pc = val;
 }
 
 static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	ulong r;
-	r = svcpu->pc;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.pc;
 }
 
 static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
 {
 	ulong pc = kvmppc_get_pc(vcpu);
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	u32 r;
 
 	/* Load the instruction manually if it failed to do so in the
 	 * exit path */
-	if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
-		kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
+	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
+		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
 
-	r = svcpu->last_inst;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.last_inst;
 }
 
 /*
@@ -344,26 +280,34 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
 static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
 {
 	ulong pc = kvmppc_get_pc(vcpu) - 4;
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	u32 r;
 
 	/* Load the instruction manually if it failed to do so in the
 	 * exit path */
-	if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
-		kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
+	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
+		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
 
-	r = svcpu->last_inst;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.last_inst;
 }
 
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	ulong r;
-	r = svcpu->fault_dar;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.fault_dar;
+}
+
+#ifdef CONFIG_KVM_BOOK3S_PR
+
+static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
+{
+	return to_book3s(vcpu)->hior;
+}
+
+static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
+			unsigned long pending_now, unsigned long old_pending)
+{
+	if (pending_now)
+		vcpu->arch.shared->int_pending = 1;
+	else if (old_pending)
+		vcpu->arch.shared->int_pending = 0;
 }
 
 static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
@@ -397,100 +341,6 @@ static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
 {
 }
 
-static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
-{
-	vcpu->arch.gpr[num] = val;
-}
-
-static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
-{
-	return vcpu->arch.gpr[num];
-}
-
-static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
-{
-	vcpu->arch.cr = val;
-}
-
-static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.cr;
-}
-
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
-{
-	vcpu->arch.xer = val;
-}
-
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.xer;
-}
-
-static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
-{
-	vcpu->arch.ctr = val;
-}
-
-static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.ctr;
-}
-
-static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
-{
-	vcpu->arch.lr = val;
-}
-
-static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.lr;
-}
-
-static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
-{
-	vcpu->arch.pc = val;
-}
-
-static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.pc;
-}
-
-static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
-{
-	ulong pc = kvmppc_get_pc(vcpu);
-
-	/* Load the instruction manually if it failed to do so in the
-	 * exit path */
-	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
-		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
-
-	return vcpu->arch.last_inst;
-}
-
-/*
- * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
- * Because the sc instruction sets SRR0 to point to the following
- * instruction, we have to fetch from pc - 4.
- */
-static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
-{
-	ulong pc = kvmppc_get_pc(vcpu) - 4;
-
-	/* Load the instruction manually if it failed to do so in the
-	 * exit path */
-	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
-		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
-
-	return vcpu->arch.last_inst;
-}
-
-static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.fault_dar;
-}
-
 static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 {
 	return false;
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 22f46061ae84..62737113c2b9 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -109,14 +109,14 @@ struct kvmppc_book3s_shadow_vcpu {
 	ulong gpr[14];
 	u32 cr;
 	u32 xer;
-
-	u32 fault_dsisr;
-	u32 last_inst;
 	ulong ctr;
 	ulong lr;
 	ulong pc;
+
 	ulong shadow_srr1;
 	ulong fault_dar;
+	u32 fault_dsisr;
+	u32 last_inst;
 
 #ifdef CONFIG_PPC_BOOK3S_32
 	u32     sr[16];			/* Guest SRs */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index b1e8f2ba2a9d..f48f3f09177f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -463,6 +463,7 @@ struct kvm_vcpu_arch {
 	ulong dabr;
 	ulong cfar;
 	ulong ppr;
+	ulong shadow_srr1;
 #endif
 	u32 vrsave; /* also USPRG0 */
 	u32 mmucr;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 5a285efba174..fda7f4020a33 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -520,6 +520,7 @@ int main(void)
 	DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
 	DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
 	DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
+	DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
 	DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
 	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
@@ -527,14 +528,13 @@ int main(void)
 	DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
 	DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
 	DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
-	DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
-			   offsetof(struct kvmppc_vcpu_book3s, vcpu));
 	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
 	DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
 	DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #ifdef CONFIG_KVM_BOOK3S_PR
+	DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
 # define SVCPU_FIELD(x, f)	DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
 #else
 # define SVCPU_FIELD(x, f)
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 360ce68c9809..34044b111daa 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -267,12 +267,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 			r = kvmppc_st(vcpu, &addr, 32, zeros, true);
 			if ((r == -ENOENT) || (r == -EPERM)) {
-				struct kvmppc_book3s_shadow_vcpu *svcpu;
-
-				svcpu = svcpu_get(vcpu);
 				*advance = 0;
 				vcpu->arch.shared->dar = vaddr;
-				svcpu->fault_dar = vaddr;
+				vcpu->arch.fault_dar = vaddr;
 
 				dsisr = DSISR_ISSTORE;
 				if (r == -ENOENT)
@@ -281,8 +278,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 					dsisr |= DSISR_PROTFAULT;
 
 				vcpu->arch.shared->dsisr = dsisr;
-				svcpu->fault_dsisr = dsisr;
-				svcpu_put(svcpu);
+				vcpu->arch.fault_dsisr = dsisr;
 
 				kvmppc_book3s_queue_irqprio(vcpu,
 					BOOK3S_INTERRUPT_DATA_STORAGE);
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 17cfae5497a3..d4e30d8fa825 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -26,8 +26,12 @@
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 #define FUNC(name) 		GLUE(.,name)
+#define GET_SHADOW_VCPU(reg)    addi	reg, r13, PACA_SVCPU
+
 #elif defined(CONFIG_PPC_BOOK3S_32)
 #define FUNC(name)		name
+#define GET_SHADOW_VCPU(reg)	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(r2)
+
 #endif /* CONFIG_PPC_BOOK3S_XX */
 
 #define VCPU_LOAD_NVGPRS(vcpu) \
@@ -87,8 +91,14 @@ kvm_start_entry:
 	VCPU_LOAD_NVGPRS(r4)
 
 kvm_start_lightweight:
+	/* Copy registers into shadow vcpu so we can access them in real mode */
+	GET_SHADOW_VCPU(r3)
+	bl	FUNC(kvmppc_copy_to_svcpu)
+	nop
+	REST_GPR(4, r1)
 
 #ifdef CONFIG_PPC_BOOK3S_64
+	/* Get the dcbz32 flag */
 	PPC_LL	r3, VCPU_HFLAGS(r4)
 	rldicl	r3, r3, 0, 63		/* r3 &= 1 */
 	stb	r3, HSTATE_RESTORE_HID5(r13)
@@ -125,18 +135,31 @@ kvmppc_handler_highmem:
 	 *
 	 */
 
-	/* R7 = vcpu */
-	PPC_LL	r7, GPR4(r1)
+	/* Transfer reg values from shadow vcpu back to vcpu struct */
+	/* On 64-bit, interrupts are still off at this point */
+	PPC_LL	r3, GPR4(r1)		/* vcpu pointer */
+	GET_SHADOW_VCPU(r4)
+	bl	FUNC(kvmppc_copy_from_svcpu)
+	nop
 
 #ifdef CONFIG_PPC_BOOK3S_64
+	/* Re-enable interrupts */
+	ld	r3, HSTATE_HOST_MSR(r13)
+	ori	r3, r3, MSR_EE
+	MTMSR_EERI(r3)
+
 	/*
 	 * Reload kernel SPRG3 value.
 	 * No need to save guest value as usermode can't modify SPRG3.
 	 */
 	ld	r3, PACA_SPRG3(r13)
 	mtspr	SPRN_SPRG3, r3
+
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
+	/* R7 = vcpu */
+	PPC_LL	r7, GPR4(r1)
+
 	PPC_STL	r14, VCPU_GPR(R14)(r7)
 	PPC_STL	r15, VCPU_GPR(R15)(r7)
 	PPC_STL	r16, VCPU_GPR(R16)(r7)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 8d45f185241a..228a9baffd9e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -61,8 +61,6 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 	memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
-	memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
-	       sizeof(get_paca()->shadow_vcpu));
 	svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
 	svcpu_put(svcpu);
 #endif
@@ -77,8 +75,6 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 	memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
-	memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
-	       sizeof(get_paca()->shadow_vcpu));
 	to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
 	svcpu_put(svcpu);
 #endif
@@ -87,6 +83,60 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 	vcpu->cpu = -1;
 }
 
+/* Copy data needed by real-mode code from vcpu to shadow vcpu */
+void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
+			  struct kvm_vcpu *vcpu)
+{
+	svcpu->gpr[0] = vcpu->arch.gpr[0];
+	svcpu->gpr[1] = vcpu->arch.gpr[1];
+	svcpu->gpr[2] = vcpu->arch.gpr[2];
+	svcpu->gpr[3] = vcpu->arch.gpr[3];
+	svcpu->gpr[4] = vcpu->arch.gpr[4];
+	svcpu->gpr[5] = vcpu->arch.gpr[5];
+	svcpu->gpr[6] = vcpu->arch.gpr[6];
+	svcpu->gpr[7] = vcpu->arch.gpr[7];
+	svcpu->gpr[8] = vcpu->arch.gpr[8];
+	svcpu->gpr[9] = vcpu->arch.gpr[9];
+	svcpu->gpr[10] = vcpu->arch.gpr[10];
+	svcpu->gpr[11] = vcpu->arch.gpr[11];
+	svcpu->gpr[12] = vcpu->arch.gpr[12];
+	svcpu->gpr[13] = vcpu->arch.gpr[13];
+	svcpu->cr  = vcpu->arch.cr;
+	svcpu->xer = vcpu->arch.xer;
+	svcpu->ctr = vcpu->arch.ctr;
+	svcpu->lr  = vcpu->arch.lr;
+	svcpu->pc  = vcpu->arch.pc;
+}
+
+/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
+			    struct kvmppc_book3s_shadow_vcpu *svcpu)
+{
+	vcpu->arch.gpr[0] = svcpu->gpr[0];
+	vcpu->arch.gpr[1] = svcpu->gpr[1];
+	vcpu->arch.gpr[2] = svcpu->gpr[2];
+	vcpu->arch.gpr[3] = svcpu->gpr[3];
+	vcpu->arch.gpr[4] = svcpu->gpr[4];
+	vcpu->arch.gpr[5] = svcpu->gpr[5];
+	vcpu->arch.gpr[6] = svcpu->gpr[6];
+	vcpu->arch.gpr[7] = svcpu->gpr[7];
+	vcpu->arch.gpr[8] = svcpu->gpr[8];
+	vcpu->arch.gpr[9] = svcpu->gpr[9];
+	vcpu->arch.gpr[10] = svcpu->gpr[10];
+	vcpu->arch.gpr[11] = svcpu->gpr[11];
+	vcpu->arch.gpr[12] = svcpu->gpr[12];
+	vcpu->arch.gpr[13] = svcpu->gpr[13];
+	vcpu->arch.cr  = svcpu->cr;
+	vcpu->arch.xer = svcpu->xer;
+	vcpu->arch.ctr = svcpu->ctr;
+	vcpu->arch.lr  = svcpu->lr;
+	vcpu->arch.pc  = svcpu->pc;
+	vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
+	vcpu->arch.fault_dar   = svcpu->fault_dar;
+	vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
+	vcpu->arch.last_inst   = svcpu->last_inst;
+}
+
 int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 {
 	int r = 1; /* Indicate we want to get back into the guest */
@@ -388,22 +438,18 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	if (page_found == -ENOENT) {
 		/* Page not found in guest PTE entries */
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-		vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
+		vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr;
 		vcpu->arch.shared->msr |=
-			(svcpu->shadow_srr1 & 0x00000000f8000000ULL);
-		svcpu_put(svcpu);
+			vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EPERM) {
 		/* Storage protection */
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-		vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
+		vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
 		vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
 		vcpu->arch.shared->msr |=
-			svcpu->shadow_srr1 & 0x00000000f8000000ULL;
-		svcpu_put(svcpu);
+			vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EINVAL) {
 		/* Page not found in guest SLB */
@@ -645,21 +691,26 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	switch (exit_nr) {
 	case BOOK3S_INTERRUPT_INST_STORAGE:
 	{
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-		ulong shadow_srr1 = svcpu->shadow_srr1;
+		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
 		vcpu->stat.pf_instruc++;
 
 #ifdef CONFIG_PPC_BOOK3S_32
 		/* We set segments as unused segments when invalidating them. So
 		 * treat the respective fault as segment fault. */
-		if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
-			kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
-			r = RESUME_GUEST;
+		{
+			struct kvmppc_book3s_shadow_vcpu *svcpu;
+			u32 sr;
+
+			svcpu = svcpu_get(vcpu);
+			sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
 			svcpu_put(svcpu);
-			break;
+			if (sr == SR_INVALID) {
+				kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+				r = RESUME_GUEST;
+				break;
+			}
 		}
 #endif
-		svcpu_put(svcpu);
 
 		/* only care about PTEG not found errors, but leave NX alone */
 		if (shadow_srr1 & 0x40000000) {
@@ -684,21 +735,26 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOK3S_INTERRUPT_DATA_STORAGE:
 	{
 		ulong dar = kvmppc_get_fault_dar(vcpu);
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-		u32 fault_dsisr = svcpu->fault_dsisr;
+		u32 fault_dsisr = vcpu->arch.fault_dsisr;
 		vcpu->stat.pf_storage++;
 
 #ifdef CONFIG_PPC_BOOK3S_32
 		/* We set segments as unused segments when invalidating them. So
 		 * treat the respective fault as segment fault. */
-		if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
-			kvmppc_mmu_map_segment(vcpu, dar);
-			r = RESUME_GUEST;
+		{
+			struct kvmppc_book3s_shadow_vcpu *svcpu;
+			u32 sr;
+
+			svcpu = svcpu_get(vcpu);
+			sr = svcpu->sr[dar >> SID_SHIFT];
 			svcpu_put(svcpu);
-			break;
+			if (sr == SR_INVALID) {
+				kvmppc_mmu_map_segment(vcpu, dar);
+				r = RESUME_GUEST;
+				break;
+			}
 		}
 #endif
-		svcpu_put(svcpu);
 
 		/* The only case we need to handle is missing shadow PTEs */
 		if (fault_dsisr & DSISR_NOHPTE) {
@@ -745,13 +801,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
 	{
 		enum emulation_result er;
-		struct kvmppc_book3s_shadow_vcpu *svcpu;
 		ulong flags;
 
 program_interrupt:
-		svcpu = svcpu_get(vcpu);
-		flags = svcpu->shadow_srr1 & 0x1f0000ull;
-		svcpu_put(svcpu);
+		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
 
 		if (vcpu->arch.shared->msr & MSR_PR) {
 #ifdef EXIT_DEBUG
@@ -883,9 +936,7 @@ program_interrupt:
 		break;
 	default:
 	{
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-		ulong shadow_srr1 = svcpu->shadow_srr1;
-		svcpu_put(svcpu);
+		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
 		/* Ugh - bork here! What did we get? */
 		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
 			exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
@@ -1060,11 +1111,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (!vcpu_book3s)
 		goto out;
 
+#ifdef CONFIG_KVM_BOOK3S_32
 	vcpu_book3s->shadow_vcpu =
 		kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
 	if (!vcpu_book3s->shadow_vcpu)
 		goto free_vcpu;
-
+#endif
 	vcpu = &vcpu_book3s->vcpu;
 	err = kvm_vcpu_init(vcpu, kvm, id);
 	if (err)
@@ -1098,8 +1150,10 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 uninit_vcpu:
 	kvm_vcpu_uninit(vcpu);
 free_shadow_vcpu:
+#ifdef CONFIG_KVM_BOOK3S_32
 	kfree(vcpu_book3s->shadow_vcpu);
 free_vcpu:
+#endif
 	vfree(vcpu_book3s);
 out:
 	return ERR_PTR(err);
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 8f7633e3afb8..cd59a3a38482 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -179,11 +179,15 @@ _GLOBAL(kvmppc_entry_trampoline)
 
 	li	r6, MSR_IR | MSR_DR
 	andc	r6, r5, r6	/* Clear DR and IR in MSR value */
+#ifdef CONFIG_PPC_BOOK3S_32
 	/*
 	 * Set EE in HOST_MSR so that it's enabled when we get into our
-	 * C exit handler function
+	 * C exit handler function.  On 64-bit we delay enabling
+	 * interrupts until we have finished transferring stuff
+	 * to or from the PACA.
 	 */
 	ori	r5, r5, MSR_EE
+#endif
 	mtsrr0	r7
 	mtsrr1	r6
 	RFI
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index e326489a5420..a088e9a8c103 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -101,17 +101,12 @@ TRACE_EVENT(kvm_exit,
 	),
 
 	TP_fast_assign(
-#ifdef CONFIG_KVM_BOOK3S_PR
-		struct kvmppc_book3s_shadow_vcpu *svcpu;
-#endif
 		__entry->exit_nr	= exit_nr;
 		__entry->pc		= kvmppc_get_pc(vcpu);
 		__entry->dar		= kvmppc_get_fault_dar(vcpu);
 		__entry->msr		= vcpu->arch.shared->msr;
 #ifdef CONFIG_KVM_BOOK3S_PR
-		svcpu = svcpu_get(vcpu);
-		__entry->srr1		= svcpu->shadow_srr1;
-		svcpu_put(svcpu);
+		__entry->srr1		= vcpu->arch.shadow_srr1;
 #endif
 		__entry->last_inst	= vcpu->arch.last_inst;
 	),
-- 
cgit v1.2.3


From a4a0f2524acc2c602cadd8e743be19d86f3a746b Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 20 Sep 2013 14:52:44 +1000
Subject: KVM: PPC: Book3S PR: Allow guest to use 64k pages

This adds the code to interpret 64k HPTEs in the guest hashed page
table (HPT), 64k SLB entries, and to tell the guest about 64k pages
in kvm_vm_ioctl_get_smmu_info().  Guest 64k pages are still shadowed
by 4k pages.

This also adds another hash table to the four we have already in
book3s_mmu_hpte.c to allow us to find all the PTEs that we have
instantiated that match a given 64k guest page.

The tlbie instruction changed starting with POWER6 to use a bit in
the RB operand to indicate large page invalidations, and to use other
RB bits to indicate the base and actual page sizes and the segment
size.  64k pages came in slightly earlier, with POWER5++.
We use one bit in vcpu->arch.hflags to indicate that the emulated
cpu supports 64k pages, and another to indicate that it has the new
tlbie definition.

The KVM_PPC_GET_SMMU_INFO ioctl presents a bit of a problem, because
the MMU capabilities depend on which CPU model we're emulating, but it
is a VM ioctl not a VCPU ioctl and therefore doesn't get passed a VCPU
fd.  In addition, commonly-used userspace (QEMU) calls it before
setting the PVR for any VCPU.  Therefore, as a best effort we look at
the first vcpu in the VM and return 64k pages or not depending on its
capabilities.  We also make the PVR default to the host PVR on recent
CPUs that support 1TB segments (and therefore multiple page sizes as
well) so that KVM_PPC_GET_SMMU_INFO will include 64k page and 1TB
segment support on those CPUs.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_asm.h    |  2 +
 arch/powerpc/include/asm/kvm_book3s.h |  6 +++
 arch/powerpc/include/asm/kvm_host.h   |  4 ++
 arch/powerpc/kvm/book3s_64_mmu.c      | 92 +++++++++++++++++++++++++++++++----
 arch/powerpc/kvm/book3s_mmu_hpte.c    | 50 +++++++++++++++++++
 arch/powerpc/kvm/book3s_pr.c          | 58 +++++++++++++++++++---
 6 files changed, 197 insertions(+), 15 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 851bac7afa4b..e2d4d467ee93 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -123,6 +123,8 @@
 #define BOOK3S_HFLAG_SLB			0x2
 #define BOOK3S_HFLAG_PAIRED_SINGLE		0x4
 #define BOOK3S_HFLAG_NATIVE_PS			0x8
+#define BOOK3S_HFLAG_MULTI_PGSIZE		0x10
+#define BOOK3S_HFLAG_NEW_TLBIE			0x20
 
 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 40f22d9c704c..1d4a1202e2d5 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -58,6 +58,9 @@ struct hpte_cache {
 	struct hlist_node list_pte_long;
 	struct hlist_node list_vpte;
 	struct hlist_node list_vpte_long;
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct hlist_node list_vpte_64k;
+#endif
 	struct rcu_head rcu_head;
 	u64 host_vpn;
 	u64 pfn;
@@ -99,6 +102,9 @@ struct kvmppc_vcpu_book3s {
 	struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
 	struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
 	struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct hlist_head hpte_hash_vpte_64k[HPTEG_HASH_NUM_VPTE_64K];
+#endif
 	int hpte_cache_count;
 	spinlock_t mmu_lock;
 };
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index f48f3f09177f..3d8b8a8921f0 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -68,10 +68,12 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 #define HPTEG_HASH_BITS_PTE_LONG	12
 #define HPTEG_HASH_BITS_VPTE		13
 #define HPTEG_HASH_BITS_VPTE_LONG	5
+#define HPTEG_HASH_BITS_VPTE_64K	11
 #define HPTEG_HASH_NUM_PTE		(1 << HPTEG_HASH_BITS_PTE)
 #define HPTEG_HASH_NUM_PTE_LONG		(1 << HPTEG_HASH_BITS_PTE_LONG)
 #define HPTEG_HASH_NUM_VPTE		(1 << HPTEG_HASH_BITS_VPTE)
 #define HPTEG_HASH_NUM_VPTE_LONG	(1 << HPTEG_HASH_BITS_VPTE_LONG)
+#define HPTEG_HASH_NUM_VPTE_64K		(1 << HPTEG_HASH_BITS_VPTE_64K)
 
 /* Physical Address Mask - allowed range of real mode RAM access */
 #define KVM_PAM			0x0fffffffffffffffULL
@@ -327,6 +329,7 @@ struct kvmppc_pte {
 	bool may_read		: 1;
 	bool may_write		: 1;
 	bool may_execute	: 1;
+	u8 page_size;		/* MMU_PAGE_xxx */
 };
 
 struct kvmppc_mmu {
@@ -359,6 +362,7 @@ struct kvmppc_slb {
 	bool large	: 1;	/* PTEs are 16MB */
 	bool tb		: 1;	/* 1TB segment */
 	bool class	: 1;
+	u8 base_page_size;	/* MMU_PAGE_xxx */
 };
 
 # ifdef CONFIG_PPC_FSL_BOOK3E
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 7e345e00661a..8277264a0bc5 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -107,9 +107,20 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
 	return kvmppc_slb_calc_vpn(slb, eaddr);
 }
 
+static int mmu_pagesize(int mmu_pg)
+{
+	switch (mmu_pg) {
+	case MMU_PAGE_64K:
+		return 16;
+	case MMU_PAGE_16M:
+		return 24;
+	}
+	return 12;
+}
+
 static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
 {
-	return slbe->large ? 24 : 12;
+	return mmu_pagesize(slbe->base_page_size);
 }
 
 static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
@@ -166,14 +177,34 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
 	avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
 	avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
 
-	if (p < 24)
-		avpn >>= ((80 - p) - 56) - 8;
+	if (p < 16)
+		avpn >>= ((80 - p) - 56) - 8;	/* 16 - p */
 	else
-		avpn <<= 8;
+		avpn <<= p - 16;
 
 	return avpn;
 }
 
+/*
+ * Return page size encoded in the second word of a HPTE, or
+ * -1 for an invalid encoding for the base page size indicated by
+ * the SLB entry.  This doesn't handle mixed pagesize segments yet.
+ */
+static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
+{
+	switch (slbe->base_page_size) {
+	case MMU_PAGE_64K:
+		if ((r & 0xf000) == 0x1000)
+			return MMU_PAGE_64K;
+		break;
+	case MMU_PAGE_16M:
+		if ((r & 0xff000) == 0)
+			return MMU_PAGE_16M;
+		break;
+	}
+	return -1;
+}
+
 static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 				struct kvmppc_pte *gpte, bool data)
 {
@@ -189,6 +220,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	u8 pp, key = 0;
 	bool found = false;
 	bool second = false;
+	int pgsize;
 	ulong mp_ea = vcpu->arch.magic_page_ea;
 
 	/* Magic page override */
@@ -202,6 +234,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 		gpte->may_execute = true;
 		gpte->may_read = true;
 		gpte->may_write = true;
+		gpte->page_size = MMU_PAGE_4K;
 
 		return 0;
 	}
@@ -222,6 +255,8 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
 		HPTE_V_SECONDARY;
 
+	pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K;
+
 do_second:
 	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
 	if (kvm_is_error_hva(ptegp))
@@ -240,6 +275,13 @@ do_second:
 	for (i=0; i<16; i+=2) {
 		/* Check all relevant fields of 1st dword */
 		if ((pteg[i] & v_mask) == v_val) {
+			/* If large page bit is set, check pgsize encoding */
+			if (slbe->large &&
+			    (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+				pgsize = decode_pagesize(slbe, pteg[i+1]);
+				if (pgsize < 0)
+					continue;
+			}
 			found = true;
 			break;
 		}
@@ -256,13 +298,13 @@ do_second:
 	v = pteg[i];
 	r = pteg[i+1];
 	pp = (r & HPTE_R_PP) | key;
-	eaddr_mask = 0xFFF;
 
 	gpte->eaddr = eaddr;
 	gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
-	if (slbe->large)
-		eaddr_mask = 0xFFFFFF;
+
+	eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1;
 	gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
+	gpte->page_size = pgsize;
 	gpte->may_execute = ((r & HPTE_R_N) ? false : true);
 	gpte->may_read = false;
 	gpte->may_write = false;
@@ -345,6 +387,21 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
 	slbe->nx    = (rs & SLB_VSID_N) ? 1 : 0;
 	slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
 
+	slbe->base_page_size = MMU_PAGE_4K;
+	if (slbe->large) {
+		if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) {
+			switch (rs & SLB_VSID_LP) {
+			case SLB_VSID_LP_00:
+				slbe->base_page_size = MMU_PAGE_16M;
+				break;
+			case SLB_VSID_LP_01:
+				slbe->base_page_size = MMU_PAGE_64K;
+				break;
+			}
+		} else
+			slbe->base_page_size = MMU_PAGE_16M;
+	}
+
 	slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
 	slbe->origv = rs;
 
@@ -463,8 +520,25 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
 
 	dprintk("KVM MMU: tlbie(0x%lx)\n", va);
 
-	if (large)
-		mask = 0xFFFFFF000ULL;
+	/*
+	 * The tlbie instruction changed behaviour starting with
+	 * POWER6.  POWER6 and later don't have the large page flag
+	 * in the instruction but in the RB value, along with bits
+	 * indicating page and segment sizes.
+	 */
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) {
+		/* POWER6 or later */
+		if (va & 1) {		/* L bit */
+			if ((va & 0xf000) == 0x1000)
+				mask = 0xFFFFFFFF0ULL;	/* 64k page */
+			else
+				mask = 0xFFFFFF000ULL;	/* 16M page */
+		}
+	} else {
+		/* older processors, e.g. PPC970 */
+		if (large)
+			mask = 0xFFFFFF000ULL;
+	}
 	kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
 }
 
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index da8b13c4b776..d2d280b16778 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -56,6 +56,14 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
 		       HPTEG_HASH_BITS_VPTE_LONG);
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage)
+{
+	return hash_64((vpage & 0xffffffff0ULL) >> 4,
+		       HPTEG_HASH_BITS_VPTE_64K);
+}
+#endif
+
 void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
 	u64 index;
@@ -83,6 +91,13 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 	hlist_add_head_rcu(&pte->list_vpte_long,
 			   &vcpu3s->hpte_hash_vpte_long[index]);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Add to vPTE_64k list */
+	index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage);
+	hlist_add_head_rcu(&pte->list_vpte_64k,
+			   &vcpu3s->hpte_hash_vpte_64k[index]);
+#endif
+
 	spin_unlock(&vcpu3s->mmu_lock);
 }
 
@@ -113,6 +128,9 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 	hlist_del_init_rcu(&pte->list_pte_long);
 	hlist_del_init_rcu(&pte->list_vpte);
 	hlist_del_init_rcu(&pte->list_vpte_long);
+#ifdef CONFIG_PPC_BOOK3S_64
+	hlist_del_init_rcu(&pte->list_vpte_64k);
+#endif
 
 	spin_unlock(&vcpu3s->mmu_lock);
 
@@ -219,6 +237,29 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
 	rcu_read_unlock();
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Flush with mask 0xffffffff0 */
+static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	struct hlist_head *list;
+	struct hpte_cache *pte;
+	u64 vp_mask = 0xffffffff0ULL;
+
+	list = &vcpu3s->hpte_hash_vpte_64k[
+		kvmppc_mmu_hash_vpte_64k(guest_vp)];
+
+	rcu_read_lock();
+
+	/* Check the list for matching entries and invalidate */
+	hlist_for_each_entry_rcu(pte, list, list_vpte_64k)
+		if ((pte->pte.vpage & vp_mask) == guest_vp)
+			invalidate_pte(vcpu, pte);
+
+	rcu_read_unlock();
+}
+#endif
+
 /* Flush with mask 0xffffff000 */
 static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
 {
@@ -249,6 +290,11 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
 	case 0xfffffffffULL:
 		kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);
 		break;
+#ifdef CONFIG_PPC_BOOK3S_64
+	case 0xffffffff0ULL:
+		kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp);
+		break;
+#endif
 	case 0xffffff000ULL:
 		kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);
 		break;
@@ -320,6 +366,10 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
 				  ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
 	kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
 				  ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
+#ifdef CONFIG_PPC_BOOK3S_64
+	kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k,
+				  ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k));
+#endif
 
 	spin_lock_init(&vcpu3s->mmu_lock);
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 228a9baffd9e..6cc99583ed39 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -306,6 +306,23 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
 	if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
 		to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
 
+	/*
+	 * If they're asking for POWER6 or later, set the flag
+	 * indicating that we can do multiple large page sizes
+	 * and 1TB segments.
+	 * Also set the flag that indicates that tlbie has the large
+	 * page bit in the RB operand instead of the instruction.
+	 */
+	switch (PVR_VER(pvr)) {
+	case PVR_POWER6:
+	case PVR_POWER7:
+	case PVR_POWER7p:
+	case PVR_POWER8:
+		vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
+			BOOK3S_HFLAG_NEW_TLBIE;
+		break;
+	}
+
 #ifdef CONFIG_PPC_BOOK3S_32
 	/* 32 bit Book3S always has 32 byte dcbz */
 	vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
@@ -1130,8 +1147,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
 
 #ifdef CONFIG_PPC_BOOK3S_64
-	/* default to book3s_64 (970fx) */
+	/*
+	 * Default to the same as the host if we're on sufficiently
+	 * recent machine that we have 1TB segments;
+	 * otherwise default to PPC970FX.
+	 */
 	vcpu->arch.pvr = 0x3C0301;
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+		vcpu->arch.pvr = mfspr(SPRN_PVR);
 #else
 	/* default to book3s_32 (750) */
 	vcpu->arch.pvr = 0x84202;
@@ -1317,7 +1340,10 @@ out:
 #ifdef CONFIG_PPC64
 int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
 {
-	info->flags = KVM_PPC_1T_SEGMENTS;
+	long int i;
+	struct kvm_vcpu *vcpu;
+
+	info->flags = 0;
 
 	/* SLB is always 64 entries */
 	info->slb_size = 64;
@@ -1328,11 +1354,31 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
 	info->sps[0].enc[0].page_shift = 12;
 	info->sps[0].enc[0].pte_enc = 0;
 
+	/*
+	 * 64k large page size.
+	 * We only want to put this in if the CPUs we're emulating
+	 * support it, but unfortunately we don't have a vcpu easily
+	 * to hand here to test.  Just pick the first vcpu, and if
+	 * that doesn't exist yet, report the minimum capability,
+	 * i.e., no 64k pages.
+	 * 1T segment support goes along with 64k pages.
+	 */
+	i = 1;
+	vcpu = kvm_get_vcpu(kvm, 0);
+	if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+		info->flags = KVM_PPC_1T_SEGMENTS;
+		info->sps[i].page_shift = 16;
+		info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
+		info->sps[i].enc[0].page_shift = 16;
+		info->sps[i].enc[0].pte_enc = 1;
+		++i;
+	}
+
 	/* Standard 16M large page size segment */
-	info->sps[1].page_shift = 24;
-	info->sps[1].slb_enc = SLB_VSID_L;
-	info->sps[1].enc[0].page_shift = 24;
-	info->sps[1].enc[0].pte_enc = 0;
+	info->sps[i].page_shift = 24;
+	info->sps[i].slb_enc = SLB_VSID_L;
+	info->sps[i].enc[0].page_shift = 24;
+	info->sps[i].enc[0].pte_enc = 0;
 
 	return 0;
 }
-- 
cgit v1.2.3


From c9029c341da646ab0c9911ea4c118eaa0a2eb0fa Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 20 Sep 2013 14:52:45 +1000
Subject: KVM: PPC: Book3S PR: Use 64k host pages where possible

Currently, PR KVM uses 4k pages for the host-side mappings of guest
memory, regardless of the host page size.  When the host page size is
64kB, we might as well use 64k host page mappings for guest mappings
of 64kB and larger pages and for guest real-mode mappings.  However,
the magic page has to remain a 4k page.

To implement this, we first add another flag bit to the guest VSID
values we use, to indicate that this segment is one where host pages
should be mapped using 64k pages.  For segments with this bit set
we set the bits in the shadow SLB entry to indicate a 64k base page
size.  When faulting in host HPTEs for this segment, we make them
64k HPTEs instead of 4k.  We record the pagesize in struct hpte_cache
for use when invalidating the HPTE.

For now we restrict the segment containing the magic page (if any) to
4k pages.  It should be possible to lift this restriction in future
by ensuring that the magic 4k page is appropriately positioned within
a host 64k page.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h |  6 ++++--
 arch/powerpc/kvm/book3s_32_mmu.c      |  1 +
 arch/powerpc/kvm/book3s_64_mmu.c      | 35 ++++++++++++++++++++++++++++++-----
 arch/powerpc/kvm/book3s_64_mmu_host.c | 27 +++++++++++++++++++++------
 arch/powerpc/kvm/book3s_pr.c          |  1 +
 5 files changed, 57 insertions(+), 13 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 1d4a1202e2d5..6bf20b4a2841 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -66,6 +66,7 @@ struct hpte_cache {
 	u64 pfn;
 	ulong slot;
 	struct kvmppc_pte pte;
+	int pagesize;
 };
 
 struct kvmppc_vcpu_book3s {
@@ -113,8 +114,9 @@ struct kvmppc_vcpu_book3s {
 #define CONTEXT_GUEST		1
 #define CONTEXT_GUEST_END	2
 
-#define VSID_REAL	0x0fffffffffc00000ULL
-#define VSID_BAT	0x0fffffffffb00000ULL
+#define VSID_REAL	0x07ffffffffc00000ULL
+#define VSID_BAT	0x07ffffffffb00000ULL
+#define VSID_64K	0x0800000000000000ULL
 #define VSID_1T		0x1000000000000000ULL
 #define VSID_REAL_DR	0x2000000000000000ULL
 #define VSID_REAL_IR	0x4000000000000000ULL
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index c8cefdd15fd8..af045533e685 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -308,6 +308,7 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	ulong mp_ea = vcpu->arch.magic_page_ea;
 
 	pte->eaddr = eaddr;
+	pte->page_size = MMU_PAGE_4K;
 
 	/* Magic page override */
 	if (unlikely(mp_ea) &&
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 8277264a0bc5..ffcde01cb995 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -542,6 +542,16 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
 	kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
 }
 
+#ifdef CONFIG_PPC_64K_PAGES
+static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid)
+{
+	ulong mp_ea = vcpu->arch.magic_page_ea;
+
+	return mp_ea && !(vcpu->arch.shared->msr & MSR_PR) &&
+		(mp_ea >> SID_SHIFT) == esid;
+}
+#endif
+
 static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 					     u64 *vsid)
 {
@@ -549,11 +559,13 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 	struct kvmppc_slb *slb;
 	u64 gvsid = esid;
 	ulong mp_ea = vcpu->arch.magic_page_ea;
+	int pagesize = MMU_PAGE_64K;
 
 	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
 		slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
 		if (slb) {
 			gvsid = slb->vsid;
+			pagesize = slb->base_page_size;
 			if (slb->tb) {
 				gvsid <<= SID_SHIFT_1T - SID_SHIFT;
 				gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
@@ -564,28 +576,41 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 
 	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
 	case 0:
-		*vsid = VSID_REAL | esid;
+		gvsid = VSID_REAL | esid;
 		break;
 	case MSR_IR:
-		*vsid = VSID_REAL_IR | gvsid;
+		gvsid |= VSID_REAL_IR;
 		break;
 	case MSR_DR:
-		*vsid = VSID_REAL_DR | gvsid;
+		gvsid |= VSID_REAL_DR;
 		break;
 	case MSR_DR|MSR_IR:
 		if (!slb)
 			goto no_slb;
 
-		*vsid = gvsid;
 		break;
 	default:
 		BUG();
 		break;
 	}
 
+#ifdef CONFIG_PPC_64K_PAGES
+	/*
+	 * Mark this as a 64k segment if the host is using
+	 * 64k pages, the host MMU supports 64k pages and
+	 * the guest segment page size is >= 64k,
+	 * but not if this segment contains the magic page.
+	 */
+	if (pagesize >= MMU_PAGE_64K &&
+	    mmu_psize_defs[MMU_PAGE_64K].shift &&
+	    !segment_contains_magic_page(vcpu, esid))
+		gvsid |= VSID_64K;
+#endif
+
 	if (vcpu->arch.shared->msr & MSR_PR)
-		*vsid |= VSID_PR;
+		gvsid |= VSID_PR;
 
+	*vsid = gvsid;
 	return 0;
 
 no_slb:
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index e5240524bf6c..6bda504ceda7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -34,7 +34,7 @@
 void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
 	ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
-			       MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M,
+			       pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M,
 			       false);
 }
 
@@ -90,6 +90,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	int attempt = 0;
 	struct kvmppc_sid_map *map;
 	int r = 0;
+	int hpsize = MMU_PAGE_4K;
 
 	/* Get host physical address for gpa */
 	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
@@ -99,7 +100,6 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 		goto out;
 	}
 	hpaddr <<= PAGE_SHIFT;
-	hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
 
 	/* and write the mapping ea -> hpa into the pt */
 	vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
@@ -117,8 +117,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 		goto out;
 	}
 
-	vsid = map->host_vsid;
-	vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+	vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M);
 
 	if (!orig_pte->may_write)
 		rflags |= HPTE_R_PP;
@@ -130,7 +129,16 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	else
 		kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
 
-	hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M);
+	/*
+	 * Use 64K pages if possible; otherwise, on 64K page kernels,
+	 * we need to transfer 4 more bits from guest real to host real addr.
+	 */
+	if (vsid & VSID_64K)
+		hpsize = MMU_PAGE_64K;
+	else
+		hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
+
+	hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
 
 map_again:
 	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -143,7 +151,7 @@ map_again:
 		}
 
 	ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
-				 MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M);
+				 hpsize, hpsize, MMU_SEGSIZE_256M);
 
 	if (ret < 0) {
 		/* If we couldn't map a primary PTE, try a secondary */
@@ -168,6 +176,7 @@ map_again:
 		pte->host_vpn = vpn;
 		pte->pte = *orig_pte;
 		pte->pfn = hpaddr >> PAGE_SHIFT;
+		pte->pagesize = hpsize;
 
 		kvmppc_mmu_hpte_cache_map(vcpu, pte);
 	}
@@ -291,6 +300,12 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
 	slb_vsid &= ~SLB_VSID_KP;
 	slb_esid |= slb_index;
 
+#ifdef CONFIG_PPC_64K_PAGES
+	/* Set host segment base page size to 64K if possible */
+	if (gvsid & VSID_64K)
+		slb_vsid |= mmu_psize_defs[MMU_PAGE_64K].sllp;
+#endif
+
 	svcpu->slb[slb_index].esid = slb_esid;
 	svcpu->slb[slb_index].vsid = slb_vsid;
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 6cc99583ed39..e9e8c748e673 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -422,6 +422,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		pte.raddr = eaddr & KVM_PAM;
 		pte.eaddr = eaddr;
 		pte.vpage = eaddr >> 12;
+		pte.page_size = MMU_PAGE_64K;
 	}
 
 	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
-- 
cgit v1.2.3


From 9308ab8e2da933d895ebbb903bf459e33ed94dec Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 20 Sep 2013 14:52:48 +1000
Subject: KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe

This adds a per-VM mutex to provide mutual exclusion between vcpus
for accesses to and updates of the guest hashed page table (HPT).
This also makes the code use single-byte writes to the HPT entry
when updating of the reference (R) and change (C) bits.  The reason
for doing this, rather than writing back the whole HPTE, is that on
non-PAPR virtual machines, the guest OS might be writing to the HPTE
concurrently, and writing back the whole HPTE might conflict with
that.  Also, real hardware does single-byte writes to update R and C.

The new mutex is taken in kvmppc_mmu_book3s_64_xlate() when reading
the HPT and updating R and/or C, and in the PAPR HPT update hcalls
(H_ENTER, H_REMOVE, etc.).  Having the mutex means that we don't need
to use a hypervisor lock bit in the HPT update hcalls, and we don't
need to be careful about the order in which the bytes of the HPTE are
updated by those hcalls.

The other change here is to make emulated TLB invalidations (tlbie)
effective across all vcpus.  To do this we call kvmppc_mmu_pte_vflush
for all vcpus in kvmppc_ppc_book3s_64_tlbie().

For 32-bit, this makes the setting of the accessed and dirty bits use
single-byte writes, and makes tlbie invalidate shadow HPTEs for all
vcpus.

With this, PR KVM can successfully run SMP guests.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h |  3 +++
 arch/powerpc/kvm/book3s_32_mmu.c    | 36 ++++++++++++++++++++++--------------
 arch/powerpc/kvm/book3s_64_mmu.c    | 33 +++++++++++++++++++++++----------
 arch/powerpc/kvm/book3s_pr.c        |  1 +
 arch/powerpc/kvm/book3s_pr_papr.c   | 33 +++++++++++++++++++++++----------
 5 files changed, 72 insertions(+), 34 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3d8b8a8921f0..0fe48729d07d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -254,6 +254,9 @@ struct kvm_arch {
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
 	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
+#ifdef CONFIG_KVM_BOOK3S_PR
+	struct mutex hpt_mutex;
+#endif
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
 	struct list_head rtas_tokens;
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index af045533e685..856af988ad59 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -271,19 +271,22 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 	/* Update PTE C and A bits, so the guest's swapper knows we used the
 	   page */
 	if (found) {
-		u32 oldpte = pteg[i+1];
-
-		if (pte->may_read)
-			pteg[i+1] |= PTEG_FLAG_ACCESSED;
-		if (pte->may_write)
-			pteg[i+1] |= PTEG_FLAG_DIRTY;
-		else
-			dprintk_pte("KVM: Mapping read-only page!\n");
-
-		/* Write back into the PTEG */
-		if (pteg[i+1] != oldpte)
-			copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
-
+		u32 pte_r = pteg[i+1];
+		char __user *addr = (char __user *) &pteg[i+1];
+
+		/*
+		 * Use single-byte writes to update the HPTE, to
+		 * conform to what real hardware does.
+		 */
+		if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) {
+			pte_r |= PTEG_FLAG_ACCESSED;
+			put_user(pte_r >> 8, addr + 2);
+		}
+		if (pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) {
+			/* XXX should only set this for stores */
+			pte_r |= PTEG_FLAG_DIRTY;
+			put_user(pte_r, addr + 3);
+		}
 		return 0;
 	}
 
@@ -348,7 +351,12 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
 
 static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
 {
-	kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000);
+	int i;
+	struct kvm_vcpu *v;
+
+	/* flush this VA on all cpus */
+	kvm_for_each_vcpu(i, v, vcpu->kvm)
+		kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000);
 }
 
 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 9e6e11270b70..ad9ecfd29c4c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -257,6 +257,8 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 
 	pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K;
 
+	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+
 do_second:
 	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
 	if (kvm_is_error_hva(ptegp))
@@ -332,30 +334,37 @@ do_second:
 
 	/* Update PTE R and C bits, so the guest's swapper knows we used the
 	 * page */
-	if (gpte->may_read) {
-		/* Set the accessed flag */
+	if (gpte->may_read && !(r & HPTE_R_R)) {
+		/*
+		 * Set the accessed flag.
+		 * We have to write this back with a single byte write
+		 * because another vcpu may be accessing this on
+		 * non-PAPR platforms such as mac99, and this is
+		 * what real hardware does.
+		 */
+		char __user *addr = (char __user *) &pteg[i+1];
 		r |= HPTE_R_R;
+		put_user(r >> 8, addr + 6);
 	}
-	if (data && gpte->may_write) {
+	if (data && gpte->may_write && !(r & HPTE_R_C)) {
 		/* Set the dirty flag -- XXX even if not writing */
+		/* Use a single byte write */
+		char __user *addr = (char __user *) &pteg[i+1];
 		r |= HPTE_R_C;
+		put_user(r, addr + 7);
 	}
 
-	/* Write back into the PTEG */
-	if (pteg[i+1] != r) {
-		pteg[i+1] = r;
-		copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
-	}
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
 
 	if (!gpte->may_read)
 		return -EPERM;
 	return 0;
 
 no_page_found:
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
 	return -ENOENT;
 
 no_seg_found:
-
 	dprintk("KVM MMU: Trigger segment fault\n");
 	return -EINVAL;
 }
@@ -520,6 +529,8 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
 				       bool large)
 {
 	u64 mask = 0xFFFFFFFFFULL;
+	long i;
+	struct kvm_vcpu *v;
 
 	dprintk("KVM MMU: tlbie(0x%lx)\n", va);
 
@@ -542,7 +553,9 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
 		if (large)
 			mask = 0xFFFFFF000ULL;
 	}
-	kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
+	/* flush this VA on all vcpus */
+	kvm_for_each_vcpu(i, v, vcpu->kvm)
+		kvmppc_mmu_pte_vflush(v, va >> 12, mask);
 }
 
 #ifdef CONFIG_PPC_64K_PAGES
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index e9e8c748e673..4fa73c3f5713 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1422,6 +1422,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
 	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 #endif
+	mutex_init(&kvm->arch.hpt_mutex);
 
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		spin_lock(&kvm_global_user_count_lock);
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 38f189975fe1..5efa97b993d8 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -48,6 +48,7 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
 	pte_index &= ~7UL;
 	pteg_addr = get_pteg_addr(vcpu, pte_index);
 
+	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
 	hpte = pteg;
 
@@ -74,6 +75,7 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
 	ret = H_SUCCESS;
 
  done:
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
 	kvmppc_set_gpr(vcpu, 3, ret);
 
 	return EMULATE_DONE;
@@ -86,26 +88,31 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
 	unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
 	unsigned long v = 0, pteg, rb;
 	unsigned long pte[2];
+	long int ret;
 
 	pteg = get_pteg_addr(vcpu, pte_index);
+	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
 
+	ret = H_NOT_FOUND;
 	if ((pte[0] & HPTE_V_VALID) == 0 ||
 	    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
-	    ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) {
-		kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
-		return EMULATE_DONE;
-	}
+	    ((flags & H_ANDCOND) && (pte[0] & avpn) != 0))
+		goto done;
 
 	copy_to_user((void __user *)pteg, &v, sizeof(v));
 
 	rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
 	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
 
-	kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+	ret = H_SUCCESS;
 	kvmppc_set_gpr(vcpu, 4, pte[0]);
 	kvmppc_set_gpr(vcpu, 5, pte[1]);
 
+ done:
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+	kvmppc_set_gpr(vcpu, 3, ret);
+
 	return EMULATE_DONE;
 }
 
@@ -133,6 +140,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
 	int paramnr = 4;
 	int ret = H_SUCCESS;
 
+	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
 		unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));
 		unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1);
@@ -181,6 +189,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
 		}
 		kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
 	}
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
 	kvmppc_set_gpr(vcpu, 3, ret);
 
 	return EMULATE_DONE;
@@ -193,15 +202,16 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 	unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
 	unsigned long rb, pteg, r, v;
 	unsigned long pte[2];
+	long int ret;
 
 	pteg = get_pteg_addr(vcpu, pte_index);
+	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
 
+	ret = H_NOT_FOUND;
 	if ((pte[0] & HPTE_V_VALID) == 0 ||
-	    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) {
-		kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
-		return EMULATE_DONE;
-	}
+	    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn))
+		goto done;
 
 	v = pte[0];
 	r = pte[1];
@@ -216,8 +226,11 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 	rb = compute_tlbie_rb(v, r, pte_index);
 	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
 	copy_to_user((void __user *)pteg, pte, sizeof(pte));
+	ret = H_SUCCESS;
 
-	kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ done:
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+	kvmppc_set_gpr(vcpu, 3, ret);
 
 	return EMULATE_DONE;
 }
-- 
cgit v1.2.3


From 3ff955024d186c512ee91263df9c850d6ae34a12 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 20 Sep 2013 14:52:49 +1000
Subject: KVM: PPC: Book3S PR: Allocate kvm_vcpu structs from kvm_vcpu_cache

This makes PR KVM allocate its kvm_vcpu structs from the kvm_vcpu_cache
rather than having them embedded in the kvmppc_vcpu_book3s struct,
which is allocated with vzalloc.  The reason is to reduce the
differences between PR and HV KVM in order to make is easier to have
them coexist in one kernel binary.

With this, the kvm_vcpu struct has a pointer to the kvmppc_vcpu_book3s
struct.  The pointer to the kvmppc_book3s_shadow_vcpu struct has moved
from the kvmppc_vcpu_book3s struct to the kvm_vcpu struct, and is only
present for 32-bit, since it is only used for 32-bit.

Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: squash in compile fix from Aneesh]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h    |  4 +---
 arch/powerpc/include/asm/kvm_book3s_32.h |  2 +-
 arch/powerpc/include/asm/kvm_host.h      |  7 +++++++
 arch/powerpc/kvm/book3s_32_mmu.c         |  8 ++++----
 arch/powerpc/kvm/book3s_64_mmu.c         | 11 +++++------
 arch/powerpc/kvm/book3s_pr.c             | 33 ++++++++++++++++++++------------
 6 files changed, 39 insertions(+), 26 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 6bf20b4a2841..603fba494a0b 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -70,8 +70,6 @@ struct hpte_cache {
 };
 
 struct kvmppc_vcpu_book3s {
-	struct kvm_vcpu vcpu;
-	struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
 	struct kvmppc_sid_map sid_map[SID_MAP_NUM];
 	struct {
 		u64 esid;
@@ -194,7 +192,7 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 {
-	return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu);
+	return vcpu->arch.book3s;
 }
 
 extern void kvm_return_point(void);
diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h
index ce0ef6ce8f86..c720e0b3238d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_32.h
+++ b/arch/powerpc/include/asm/kvm_book3s_32.h
@@ -22,7 +22,7 @@
 
 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
 {
-	return to_book3s(vcpu)->shadow_vcpu;
+	return vcpu->arch.shadow_vcpu;
 }
 
 static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0fe48729d07d..404dbc81434d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -86,6 +86,9 @@ struct lppaca;
 struct slb_shadow;
 struct dtl_entry;
 
+struct kvmppc_vcpu_book3s;
+struct kvmppc_book3s_shadow_vcpu;
+
 struct kvm_vm_stat {
 	u32 remote_tlb_flush;
 };
@@ -408,6 +411,10 @@ struct kvm_vcpu_arch {
 	int slb_max;		/* 1 + index of last valid entry in slb[] */
 	int slb_nr;		/* total number of entries in SLB */
 	struct kvmppc_mmu mmu;
+	struct kvmppc_vcpu_book3s *book3s;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_32
+	struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
 #endif
 
 	ulong gpr[32];
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 856af988ad59..b14af6d09347 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -111,10 +111,11 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
 	kvmppc_set_msr(vcpu, 0);
 }
 
-static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s,
+static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
 				      u32 sre, gva_t eaddr,
 				      bool primary)
 {
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 	u32 page, hash, pteg, htabmask;
 	hva_t r;
 
@@ -132,7 +133,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
 		kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
 		sr_vsid(sre));
 
-	r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+	r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
 	if (kvm_is_error_hva(r))
 		return r;
 	return r | (pteg & ~PAGE_MASK);
@@ -203,7 +204,6 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 				     struct kvmppc_pte *pte, bool data,
 				     bool primary)
 {
-	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 	u32 sre;
 	hva_t ptegp;
 	u32 pteg[16];
@@ -218,7 +218,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 
 	pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
 
-	ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary);
+	ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary);
 	if (kvm_is_error_hva(ptegp)) {
 		printk(KERN_INFO "KVM: Invalid PTEG!\n");
 		goto no_page_found;
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index ad9ecfd29c4c..c110145522e6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -130,11 +130,11 @@ static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
 	return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
 }
 
-static hva_t kvmppc_mmu_book3s_64_get_pteg(
-				struct kvmppc_vcpu_book3s *vcpu_book3s,
+static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu,
 				struct kvmppc_slb *slbe, gva_t eaddr,
 				bool second)
 {
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 	u64 hash, pteg, htabsize;
 	u32 ssize;
 	hva_t r;
@@ -159,10 +159,10 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
 
 	/* When running a PAPR guest, SDR1 contains a HVA address instead
            of a GPA */
-	if (vcpu_book3s->vcpu.arch.papr_enabled)
+	if (vcpu->arch.papr_enabled)
 		r = pteg;
 	else
-		r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+		r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
 
 	if (kvm_is_error_hva(r))
 		return r;
@@ -208,7 +208,6 @@ static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
 static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 				struct kvmppc_pte *gpte, bool data)
 {
-	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 	struct kvmppc_slb *slbe;
 	hva_t ptegp;
 	u64 pteg[16];
@@ -260,7 +259,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 
 do_second:
-	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
+	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second);
 	if (kvm_is_error_hva(ptegp))
 		goto no_page_found;
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 4fa73c3f5713..677d7e33b1ff 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -66,7 +66,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 #endif
 	vcpu->cpu = smp_processor_id();
 #ifdef CONFIG_PPC_BOOK3S_32
-	current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
+	current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
 #endif
 }
 
@@ -1125,17 +1125,22 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	int err = -ENOMEM;
 	unsigned long p;
 
+	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu)
+		goto out;
+
 	vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
 	if (!vcpu_book3s)
-		goto out;
+		goto free_vcpu;
+	vcpu->arch.book3s = vcpu_book3s;
 
 #ifdef CONFIG_KVM_BOOK3S_32
-	vcpu_book3s->shadow_vcpu =
-		kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
-	if (!vcpu_book3s->shadow_vcpu)
-		goto free_vcpu;
+	vcpu->arch.shadow_vcpu =
+		kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
+	if (!vcpu->arch.shadow_vcpu)
+		goto free_vcpu3s;
 #endif
-	vcpu = &vcpu_book3s->vcpu;
+
 	err = kvm_vcpu_init(vcpu, kvm, id);
 	if (err)
 		goto free_shadow_vcpu;
@@ -1175,10 +1180,12 @@ uninit_vcpu:
 	kvm_vcpu_uninit(vcpu);
 free_shadow_vcpu:
 #ifdef CONFIG_KVM_BOOK3S_32
-	kfree(vcpu_book3s->shadow_vcpu);
-free_vcpu:
+	kfree(vcpu->arch.shadow_vcpu);
+free_vcpu3s:
 #endif
 	vfree(vcpu_book3s);
+free_vcpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
 out:
 	return ERR_PTR(err);
 }
@@ -1189,8 +1196,11 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 
 	free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
 	kvm_vcpu_uninit(vcpu);
-	kfree(vcpu_book3s->shadow_vcpu);
+#ifdef CONFIG_KVM_BOOK3S_32
+	kfree(vcpu->arch.shadow_vcpu);
+#endif
 	vfree(vcpu_book3s);
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1452,8 +1462,7 @@ static int kvmppc_book3s_init(void)
 {
 	int r;
 
-	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
-		     THIS_MODULE);
+	r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 
 	if (r)
 		return r;
-- 
cgit v1.2.3


From 93b159b466bdc9753bba5c3c51b40d7ddbbcc07c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 20 Sep 2013 14:52:51 +1000
Subject: KVM: PPC: Book3S PR: Better handling of host-side read-only pages

Currently we request write access to all pages that get mapped into the
guest, even if the guest is only loading from the page.  This reduces
the effectiveness of KSM because it means that we unshare every page we
access.  Also, we always set the changed (C) bit in the guest HPTE if
it allows writing, even for a guest load.

This fixes both these problems.  We pass an 'iswrite' flag to the
mmu.xlate() functions and to kvmppc_mmu_map_page() to indicate whether
the access is a load or a store.  The mmu.xlate() functions now only
set C for stores.  kvmppc_gfn_to_pfn() now calls gfn_to_pfn_prot()
instead of gfn_to_pfn() so that it can indicate whether we need write
access to the page, and get back a 'writable' flag to indicate whether
the page is writable or not.  If that 'writable' flag is clear, we then
make the host HPTE read-only even if the guest HPTE allowed writing.

This means that we can get a protection fault when the guest writes to a
page that it has mapped read-write but which is read-only on the host
side (perhaps due to KSM having merged the page).  Thus we now call
kvmppc_handle_pagefault() for protection faults as well as HPTE not found
faults.  In kvmppc_handle_pagefault(), if the access was allowed by the
guest HPTE and we thus need to install a new host HPTE, we then need to
remove the old host HPTE if there is one.  This is done with a new
function, kvmppc_mmu_unmap_page(), which uses kvmppc_mmu_pte_vflush() to
find and remove the old host HPTE.

Since the memslot-related functions require the KVM SRCU read lock to
be held, this adds srcu_read_lock/unlock pairs around the calls to
kvmppc_handle_pagefault().

Finally, this changes kvmppc_mmu_book3s_32_xlate_pte() to not ignore
guest HPTEs that don't permit access, and to return -EPERM for accesses
that are not permitted by the page protections.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h |  7 +++++--
 arch/powerpc/include/asm/kvm_host.h   |  3 ++-
 arch/powerpc/kvm/book3s.c             | 15 +++++++++------
 arch/powerpc/kvm/book3s_32_mmu.c      | 32 +++++++++++++++++---------------
 arch/powerpc/kvm/book3s_32_mmu_host.c | 14 +++++++++++---
 arch/powerpc/kvm/book3s_64_mmu.c      |  9 +++++----
 arch/powerpc/kvm/book3s_64_mmu_host.c | 20 +++++++++++++++++---
 arch/powerpc/kvm/book3s_64_mmu_hv.c   |  2 +-
 arch/powerpc/kvm/book3s_pr.c          | 29 ++++++++++++++++++++++++-----
 9 files changed, 91 insertions(+), 40 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 603fba494a0b..a07bd7e7d4a4 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -128,7 +128,9 @@ extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr);
 extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
-extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
+extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
+			       bool iswrite);
+extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
 extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size);
 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
@@ -157,7 +159,8 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
 			   bool upper, u32 val);
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
-extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+			bool *writable);
 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 			unsigned long *rmap, long pte_index, int realmode);
 extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 404dbc81434d..b6881917cd84 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -348,7 +348,8 @@ struct kvmppc_mmu {
 	/* book3s */
 	void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value);
 	u32  (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
-	int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data);
+	int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
+		      struct kvmppc_pte *pte, bool data, bool iswrite);
 	void (*reset_msr)(struct kvm_vcpu *vcpu);
 	void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
 	int  (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index f97369dc457c..807103ad2628 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -286,7 +286,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+			bool *writable)
 {
 	ulong mp_pa = vcpu->arch.magic_page_pa;
 
@@ -302,20 +303,22 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
 
 		pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
 		get_page(pfn_to_page(pfn));
+		if (writable)
+			*writable = true;
 		return pfn;
 	}
 
-	return gfn_to_pfn(vcpu->kvm, gfn);
+	return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
 }
 
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
-			 struct kvmppc_pte *pte)
+			bool iswrite, struct kvmppc_pte *pte)
 {
 	int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
 	int r;
 
 	if (relocated) {
-		r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
+		r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite);
 	} else {
 		pte->eaddr = eaddr;
 		pte->raddr = eaddr & KVM_PAM;
@@ -361,7 +364,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 
 	vcpu->stat.st++;
 
-	if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+	if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte))
 		return -ENOENT;
 
 	*eaddr = pte.raddr;
@@ -383,7 +386,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 
 	vcpu->stat.ld++;
 
-	if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+	if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte))
 		goto nopte;
 
 	*eaddr = pte.raddr;
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index b14af6d09347..76a64ce6a5b6 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -84,7 +84,8 @@ static inline bool sr_nx(u32 sr_raw)
 }
 
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
-					  struct kvmppc_pte *pte, bool data);
+					  struct kvmppc_pte *pte, bool data,
+					  bool iswrite);
 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 					     u64 *vsid);
 
@@ -99,7 +100,7 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
 	u64 vsid;
 	struct kvmppc_pte pte;
 
-	if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data))
+	if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data, false))
 		return pte.vpage;
 
 	kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
@@ -146,7 +147,8 @@ static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)
 }
 
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
-					  struct kvmppc_pte *pte, bool data)
+					  struct kvmppc_pte *pte, bool data,
+					  bool iswrite)
 {
 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 	struct kvmppc_bat *bat;
@@ -187,8 +189,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 				printk(KERN_INFO "BAT is not readable!\n");
 				continue;
 			}
-			if (!pte->may_write) {
-				/* let's treat r/o BATs as not-readable for now */
+			if (iswrite && !pte->may_write) {
 				dprintk_pte("BAT is read-only!\n");
 				continue;
 			}
@@ -202,7 +203,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 
 static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 				     struct kvmppc_pte *pte, bool data,
-				     bool primary)
+				     bool iswrite, bool primary)
 {
 	u32 sre;
 	hva_t ptegp;
@@ -258,9 +259,6 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 					break;
 			}
 
-			if ( !pte->may_read )
-				continue;
-
 			dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
 				    pteg[i], pteg[i+1], pp);
 			found = 1;
@@ -282,11 +280,12 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 			pte_r |= PTEG_FLAG_ACCESSED;
 			put_user(pte_r >> 8, addr + 2);
 		}
-		if (pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) {
-			/* XXX should only set this for stores */
+		if (iswrite && pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) {
 			pte_r |= PTEG_FLAG_DIRTY;
 			put_user(pte_r, addr + 3);
 		}
+		if (!pte->may_read || (iswrite && !pte->may_write))
+			return -EPERM;
 		return 0;
 	}
 
@@ -305,7 +304,8 @@ no_page_found:
 }
 
 static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
-				      struct kvmppc_pte *pte, bool data)
+				      struct kvmppc_pte *pte, bool data,
+				      bool iswrite)
 {
 	int r;
 	ulong mp_ea = vcpu->arch.magic_page_ea;
@@ -327,11 +327,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 		return 0;
 	}
 
-	r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data);
+	r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data, iswrite);
 	if (r < 0)
-	       r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true);
+		r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
+						   data, iswrite, true);
 	if (r < 0)
-	       r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false);
+		r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
+						   data, iswrite, false);
 
 	return r;
 }
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 00e619bf608e..673322329238 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -138,7 +138,8 @@ static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr,
 
 extern char etext[];
 
-int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
+			bool iswrite)
 {
 	pfn_t hpaddr;
 	u64 vpn;
@@ -152,9 +153,11 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	bool evict = false;
 	struct hpte_cache *pte;
 	int r = 0;
+	bool writable;
 
 	/* Get host physical address for gpa */
-	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
+	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
+				   iswrite, &writable);
 	if (is_error_noslot_pfn(hpaddr)) {
 		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
 				 orig_pte->eaddr);
@@ -204,7 +207,7 @@ next_pteg:
 		(primary ? 0 : PTE_SEC);
 	pteg1 = hpaddr | PTE_M | PTE_R | PTE_C;
 
-	if (orig_pte->may_write) {
+	if (orig_pte->may_write && writable) {
 		pteg1 |= PP_RWRW;
 		mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
 	} else {
@@ -259,6 +262,11 @@ out:
 	return r;
 }
 
+void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+	kvmppc_mmu_pte_vflush(vcpu, pte->vpage, 0xfffffffffULL);
+}
+
 static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 {
 	struct kvmppc_sid_map *map;
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index c110145522e6..83da1f868fd5 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -206,7 +206,8 @@ static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
 }
 
 static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
-				struct kvmppc_pte *gpte, bool data)
+				      struct kvmppc_pte *gpte, bool data,
+				      bool iswrite)
 {
 	struct kvmppc_slb *slbe;
 	hva_t ptegp;
@@ -345,8 +346,8 @@ do_second:
 		r |= HPTE_R_R;
 		put_user(r >> 8, addr + 6);
 	}
-	if (data && gpte->may_write && !(r & HPTE_R_C)) {
-		/* Set the dirty flag -- XXX even if not writing */
+	if (iswrite && gpte->may_write && !(r & HPTE_R_C)) {
+		/* Set the dirty flag */
 		/* Use a single byte write */
 		char __user *addr = (char __user *) &pteg[i+1];
 		r |= HPTE_R_C;
@@ -355,7 +356,7 @@ do_second:
 
 	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
 
-	if (!gpte->may_read)
+	if (!gpte->may_read || (iswrite && !gpte->may_write))
 		return -EPERM;
 	return 0;
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 6bda504ceda7..cc9fb89b8884 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -78,7 +78,8 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 	return NULL;
 }
 
-int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
+			bool iswrite)
 {
 	unsigned long vpn;
 	pfn_t hpaddr;
@@ -91,9 +92,11 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	struct kvmppc_sid_map *map;
 	int r = 0;
 	int hpsize = MMU_PAGE_4K;
+	bool writable;
 
 	/* Get host physical address for gpa */
-	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
+	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
+				   iswrite, &writable);
 	if (is_error_noslot_pfn(hpaddr)) {
 		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
 		r = -EINVAL;
@@ -119,7 +122,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 
 	vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M);
 
-	if (!orig_pte->may_write)
+	if (!orig_pte->may_write || !writable)
 		rflags |= HPTE_R_PP;
 	else
 		mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
@@ -186,6 +189,17 @@ out:
 	return r;
 }
 
+void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+	u64 mask = 0xfffffffffULL;
+	u64 vsid;
+
+	vcpu->arch.mmu.esid_to_vsid(vcpu, pte->eaddr >> SID_SHIFT, &vsid);
+	if (vsid & VSID_64K)
+		mask = 0xffffffff0ULL;
+	kvmppc_mmu_pte_vflush(vcpu, pte->vpage, mask);
+}
+
 static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 {
 	struct kvmppc_sid_map *map;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ccb89a048bf8..394fef820f0c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -451,7 +451,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
 }
 
 static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
-			struct kvmppc_pte *gpte, bool data)
+			struct kvmppc_pte *gpte, bool data, bool iswrite)
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct kvmppc_slb *slbe;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 677d7e33b1ff..2f84ed807184 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -401,6 +401,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			    ulong eaddr, int vec)
 {
 	bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
+	bool iswrite = false;
 	int r = RESUME_GUEST;
 	int relocated;
 	int page_found = 0;
@@ -411,10 +412,12 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	u64 vsid;
 
 	relocated = data ? dr : ir;
+	if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
+		iswrite = true;
 
 	/* Resolve real address if translation turned on */
 	if (relocated) {
-		page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
+		page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
 	} else {
 		pte.may_execute = true;
 		pte.may_read = true;
@@ -475,12 +478,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
 	} else if (!is_mmio &&
 		   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+		if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
+			/*
+			 * There is already a host HPTE there, presumably
+			 * a read-only one for a page the guest thinks
+			 * is writable, so get rid of it first.
+			 */
+			kvmppc_mmu_unmap_page(vcpu, &pte);
+		}
 		/* The guest's PTE is not mapped yet. Map on the host */
-		kvmppc_mmu_map_page(vcpu, &pte);
+		kvmppc_mmu_map_page(vcpu, &pte, iswrite);
 		if (data)
 			vcpu->stat.sp_storage++;
 		else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
-			(!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
+			 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
 			kvmppc_patch_dcbz(vcpu, &pte);
 	} else {
 		/* MMIO */
@@ -732,7 +743,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 		/* only care about PTEG not found errors, but leave NX alone */
 		if (shadow_srr1 & 0x40000000) {
+			int idx = srcu_read_lock(&vcpu->kvm->srcu);
 			r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+			srcu_read_unlock(&vcpu->kvm->srcu, idx);
 			vcpu->stat.sp_instruc++;
 		} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
 			  (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
@@ -774,9 +787,15 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 #endif
 
-		/* The only case we need to handle is missing shadow PTEs */
-		if (fault_dsisr & DSISR_NOHPTE) {
+		/*
+		 * We need to handle missing shadow PTEs, and
+		 * protection faults due to us mapping a page read-only
+		 * when the guest thinks it is writable.
+		 */
+		if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
+			int idx = srcu_read_lock(&vcpu->kvm->srcu);
 			r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+			srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		} else {
 			vcpu->arch.shared->dar = dar;
 			vcpu->arch.shared->dsisr = fault_dsisr;
-- 
cgit v1.2.3


From d78bca72961ae816181b386ff6b347419dfcd5cf Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 20 Sep 2013 14:52:52 +1000
Subject: KVM: PPC: Book3S PR: Use mmu_notifier_retry() in
 kvmppc_mmu_map_page()

When the MM code is invalidating a range of pages, it calls the KVM
kvm_mmu_notifier_invalidate_range_start() notifier function, which calls
kvm_unmap_hva_range(), which arranges to flush all the existing host
HPTEs for guest pages.  However, the Linux PTEs for the range being
flushed are still valid at that point.  We are not supposed to establish
any new references to pages in the range until the ...range_end()
notifier gets called.  The PPC-specific KVM code doesn't get any
explicit notification of that; instead, we are supposed to use
mmu_notifier_retry() to test whether we are or have been inside a
range flush notifier pair while we have been getting a page and
instantiating a host HPTE for the page.

This therefore adds a call to mmu_notifier_retry inside
kvmppc_mmu_map_page().  This call is inside a region locked with
kvm->mmu_lock, which is the same lock that is called by the KVM
MMU notifier functions, thus ensuring that no new notification can
proceed while we are in the locked region.  Inside this region we
also create the host HPTE and link the corresponding hpte_cache
structure into the lists used to find it later.  We cannot allocate
the hpte_cache structure inside this locked region because that can
lead to deadlock, so we allocate it outside the region and free it
if we end up not using it.

This also moves the updates of vcpu3s->hpte_cache_count inside the
regions locked with vcpu3s->mmu_lock, and does the increment in
kvmppc_mmu_hpte_cache_map() when the pte is added to the cache
rather than when it is allocated, in order that the hpte_cache_count
is accurate.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h |  1 +
 arch/powerpc/kvm/book3s_64_mmu_host.c | 37 ++++++++++++++++++++++++++---------
 arch/powerpc/kvm/book3s_mmu_hpte.c    | 14 +++++++++----
 3 files changed, 39 insertions(+), 13 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a07bd7e7d4a4..0ec00f4fef91 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -142,6 +142,7 @@ extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
 
 extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
 extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte);
 extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
 extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index cc9fb89b8884..307e6e838e0d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -93,6 +93,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
 	int r = 0;
 	int hpsize = MMU_PAGE_4K;
 	bool writable;
+	unsigned long mmu_seq;
+	struct kvm *kvm = vcpu->kvm;
+	struct hpte_cache *cpte;
+
+	/* used to check for invalidations in progress */
+	mmu_seq = kvm->mmu_notifier_seq;
+	smp_rmb();
 
 	/* Get host physical address for gpa */
 	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
@@ -143,6 +150,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
 
 	hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
 
+	cpte = kvmppc_mmu_hpte_cache_next(vcpu);
+
+	spin_lock(&kvm->mmu_lock);
+	if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
+		r = -EAGAIN;
+		goto out_unlock;
+	}
+
 map_again:
 	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
@@ -150,7 +165,7 @@ map_again:
 	if (attempt > 1)
 		if (ppc_md.hpte_remove(hpteg) < 0) {
 			r = -1;
-			goto out;
+			goto out_unlock;
 		}
 
 	ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
@@ -163,8 +178,6 @@ map_again:
 		attempt++;
 		goto map_again;
 	} else {
-		struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
-
 		trace_kvm_book3s_64_mmu_map(rflags, hpteg,
 					    vpn, hpaddr, orig_pte);
 
@@ -175,15 +188,21 @@ map_again:
 			hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 		}
 
-		pte->slot = hpteg + (ret & 7);
-		pte->host_vpn = vpn;
-		pte->pte = *orig_pte;
-		pte->pfn = hpaddr >> PAGE_SHIFT;
-		pte->pagesize = hpsize;
+		cpte->slot = hpteg + (ret & 7);
+		cpte->host_vpn = vpn;
+		cpte->pte = *orig_pte;
+		cpte->pfn = hpaddr >> PAGE_SHIFT;
+		cpte->pagesize = hpsize;
 
-		kvmppc_mmu_hpte_cache_map(vcpu, pte);
+		kvmppc_mmu_hpte_cache_map(vcpu, cpte);
+		cpte = NULL;
 	}
+
+out_unlock:
+	spin_unlock(&kvm->mmu_lock);
 	kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+	if (cpte)
+		kvmppc_mmu_hpte_cache_free(cpte);
 
 out:
 	return r;
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index d2d280b16778..6b79bfc44ba5 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -98,6 +98,8 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 			   &vcpu3s->hpte_hash_vpte_64k[index]);
 #endif
 
+	vcpu3s->hpte_cache_count++;
+
 	spin_unlock(&vcpu3s->mmu_lock);
 }
 
@@ -131,10 +133,10 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 #ifdef CONFIG_PPC_BOOK3S_64
 	hlist_del_init_rcu(&pte->list_vpte_64k);
 #endif
+	vcpu3s->hpte_cache_count--;
 
 	spin_unlock(&vcpu3s->mmu_lock);
 
-	vcpu3s->hpte_cache_count--;
 	call_rcu(&pte->rcu_head, free_pte_rcu);
 }
 
@@ -331,15 +333,19 @@ struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	struct hpte_cache *pte;
 
-	pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
-	vcpu3s->hpte_cache_count++;
-
 	if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
 		kvmppc_mmu_pte_flush_all(vcpu);
 
+	pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
+
 	return pte;
 }
 
+void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte)
+{
+	kmem_cache_free(hpte_cache, pte);
+}
+
 void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
 {
 	kvmppc_mmu_pte_flush(vcpu, 0, 0);
-- 
cgit v1.2.3


From 44a3add86311bb9d060d795bcdcdc9b8c7a35bd0 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 4 Oct 2013 21:45:04 +1000
Subject: KVM: PPC: Book3S HV: Better handling of exceptions that happen in
 real mode

When an interrupt or exception happens in the guest that comes to the
host, the CPU goes to hypervisor real mode (MMU off) to handle the
exception but doesn't change the MMU context.  After saving a few
registers, we then clear the "in guest" flag.  If, for any reason,
we get an exception in the real-mode code, that then gets handled
by the normal kernel exception handlers, which turn the MMU on.  This
is disastrous if the MMU is still set to the guest context, since we
end up executing instructions from random places in the guest kernel
with hypervisor privilege.

In order to catch this situation, we define a new value for the "in guest"
flag, KVM_GUEST_MODE_HOST_HV, to indicate that we are in hypervisor real
mode with guest MMU context.  If the "in guest" flag is set to this value,
we branch off to an emergency handler.  For the moment, this just does
a branch to self to stop the CPU from doing anything further.

While we're here, we define another new flag value to indicate that we
are in a HV guest, as distinct from a PR guest.  This will be useful
when we have a kernel that can support both PR and HV guests concurrently.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_asm.h      |  2 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 31 +++++++++++++++++++++++++------
 2 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index e2d4d467ee93..1bd92fd43cfb 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -138,6 +138,8 @@
 #define KVM_GUEST_MODE_NONE	0
 #define KVM_GUEST_MODE_GUEST	1
 #define KVM_GUEST_MODE_SKIP	2
+#define KVM_GUEST_MODE_GUEST_HV	3
+#define KVM_GUEST_MODE_HOST_HV	4
 
 #define KVM_INST_FETCH_FAILED	-1
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6e3370f42a63..84105eb18a0e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -387,6 +387,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	mtspr	SPRN_DAR, r5
 	mtspr	SPRN_DSISR, r6
 
+	li	r6, KVM_GUEST_MODE_HOST_HV
+	stb	r6, HSTATE_IN_GUEST(r13)
+
 BEGIN_FTR_SECTION
 	/* Restore AMR and UAMOR, set AMOR to all 1s */
 	ld	r5,VCPU_AMR(r4)
@@ -690,7 +693,7 @@ fast_guest_return:
 	mtspr	SPRN_HSRR1,r11
 
 	/* Activate guest mode, so faults get handled by KVM */
-	li	r9, KVM_GUEST_MODE_GUEST
+	li	r9, KVM_GUEST_MODE_GUEST_HV
 	stb	r9, HSTATE_IN_GUEST(r13)
 
 	/* Enter guest */
@@ -750,6 +753,14 @@ kvmppc_interrupt:
 	 */
 	/* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
 	std	r9, HSTATE_HOST_R2(r13)
+
+	lbz	r9, HSTATE_IN_GUEST(r13)
+	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
+	beq	kvmppc_bad_host_intr
+	/* We're now back in the host but in guest MMU context */
+	li	r9, KVM_GUEST_MODE_HOST_HV
+	stb	r9, HSTATE_IN_GUEST(r13)
+
 	ld	r9, HSTATE_KVM_VCPU(r13)
 
 	/* Save registers */
@@ -801,10 +812,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	std	r3, VCPU_GPR(R13)(r9)
 	std	r4, VCPU_LR(r9)
 
-	/* Unset guest mode */
-	li	r0, KVM_GUEST_MODE_NONE
-	stb	r0, HSTATE_IN_GUEST(r13)
-
 	stw	r12,VCPU_TRAP(r9)
 
 	/* Save HEIR (HV emulation assist reg) in last_inst
@@ -1198,6 +1205,10 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_AMR,r6
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
+	/* Unset guest mode */
+	li	r0, KVM_GUEST_MODE_NONE
+	stb	r0, HSTATE_IN_GUEST(r13)
+
 	/* Switch DSCR back to host value */
 BEGIN_FTR_SECTION
 	mfspr	r8, SPRN_DSCR
@@ -1400,7 +1411,7 @@ fast_interrupt_c_return:
 	stw	r8, VCPU_LAST_INST(r9)
 
 	/* Unset guest mode. */
-	li	r0, KVM_GUEST_MODE_NONE
+	li	r0, KVM_GUEST_MODE_HOST_HV
 	stb	r0, HSTATE_IN_GUEST(r13)
 	b	guest_exit_cont
 
@@ -1949,3 +1960,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	lwz	r7,VCPU_VRSAVE(r4)
 	mtspr	SPRN_VRSAVE,r7
 	blr
+
+/*
+ * We come here if we get any exception or interrupt while we are
+ * executing host real mode code while in guest MMU context.
+ * For now just spin, but we should do something better.
+ */
+kvmppc_bad_host_intr:
+	b	.
-- 
cgit v1.2.3


From c199efa295fc99c029bcbd88ed4ded3334493e95 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <r65777@freescale.com>
Date: Thu, 19 Sep 2013 11:32:41 +0530
Subject: powerpc: book3e: _PAGE_LENDIAN must be _PAGE_ENDIAN

For booke3e _PAGE_ENDIAN is not defined. Infact what is defined
is "_PAGE_LENDIAN" which is wrong and that should be _PAGE_ENDIAN.
There are no compilation errors as
arch/powerpc/include/asm/pte-common.h defines _PAGE_ENDIAN to 0
as it is not defined anywhere.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/pte-book3e.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h
index 0156702ba24e..576ad88104cb 100644
--- a/arch/powerpc/include/asm/pte-book3e.h
+++ b/arch/powerpc/include/asm/pte-book3e.h
@@ -40,7 +40,7 @@
 #define _PAGE_U1	0x010000
 #define _PAGE_U0	0x020000
 #define _PAGE_ACCESSED	0x040000
-#define _PAGE_LENDIAN	0x080000
+#define _PAGE_ENDIAN	0x080000
 #define _PAGE_GUARDED	0x100000
 #define _PAGE_COHERENT	0x200000 /* M: enforce memory coherence */
 #define _PAGE_NO_CACHE	0x400000 /* I: cache inhibit */
-- 
cgit v1.2.3


From 95791988fec645d196e746fcc0e329e19f7b1347 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <r65777@freescale.com>
Date: Wed, 26 Jun 2013 11:12:22 +0530
Subject: powerpc: move debug registers in a structure

This way we can use same data type struct with KVM and
also help in using other debug related function.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/processor.h |  38 +++++----
 arch/powerpc/include/asm/reg_booke.h |   8 +-
 arch/powerpc/kernel/asm-offsets.c    |   2 +-
 arch/powerpc/kernel/process.c        |  42 +++++-----
 arch/powerpc/kernel/ptrace.c         | 154 +++++++++++++++++------------------
 arch/powerpc/kernel/ptrace32.c       |   2 +-
 arch/powerpc/kernel/signal_32.c      |   6 +-
 arch/powerpc/kernel/traps.c          |  35 ++++----
 8 files changed, 147 insertions(+), 140 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index e378cccfca55..b43844442a6c 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -147,22 +147,7 @@ typedef struct {
 #define TS_FPR(i) fpr[i][TS_FPROFFSET]
 #define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET]
 
-struct thread_struct {
-	unsigned long	ksp;		/* Kernel stack pointer */
-	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
-
-#ifdef CONFIG_PPC64
-	unsigned long	ksp_vsid;
-#endif
-	struct pt_regs	*regs;		/* Pointer to saved register state */
-	mm_segment_t	fs;		/* for get_fs() validation */
-#ifdef CONFIG_BOOKE
-	/* BookE base exception scratch space; align on cacheline */
-	unsigned long	normsave[8] ____cacheline_aligned;
-#endif
-#ifdef CONFIG_PPC32
-	void		*pgdir;		/* root of page-table tree */
-#endif
+struct debug_reg {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	/*
 	 * The following help to manage the use of Debug Control Registers
@@ -199,6 +184,27 @@ struct thread_struct {
 	unsigned long	dvc2;
 #endif
 #endif
+};
+
+struct thread_struct {
+	unsigned long	ksp;		/* Kernel stack pointer */
+	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
+
+#ifdef CONFIG_PPC64
+	unsigned long	ksp_vsid;
+#endif
+	struct pt_regs	*regs;		/* Pointer to saved register state */
+	mm_segment_t	fs;		/* for get_fs() validation */
+#ifdef CONFIG_BOOKE
+	/* BookE base exception scratch space; align on cacheline */
+	unsigned long	normsave[8] ____cacheline_aligned;
+#endif
+#ifdef CONFIG_PPC32
+	void		*pgdir;		/* root of page-table tree */
+#endif
+	/* Debug Registers */
+	struct debug_reg debug;
+
 	/* FP and VSX 0-31 register set */
 	double		fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
 	struct {
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index ed8f836da094..2e31aacd8acc 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -381,7 +381,7 @@
 #define DBCR0_IA34T	0x00004000	/* Instr Addr 3-4 range Toggle */
 #define DBCR0_FT	0x00000001	/* Freeze Timers on debug event */
 
-#define dbcr_iac_range(task)	((task)->thread.dbcr0)
+#define dbcr_iac_range(task)	((task)->thread.debug.dbcr0)
 #define DBCR_IAC12I	DBCR0_IA12			/* Range Inclusive */
 #define DBCR_IAC12X	(DBCR0_IA12 | DBCR0_IA12X)	/* Range Exclusive */
 #define DBCR_IAC12MODE	(DBCR0_IA12 | DBCR0_IA12X)	/* IAC 1-2 Mode Bits */
@@ -395,7 +395,7 @@
 #define DBCR1_DAC1W	0x20000000	/* DAC1 Write Debug Event */
 #define DBCR1_DAC2W	0x10000000	/* DAC2 Write Debug Event */
 
-#define dbcr_dac(task)	((task)->thread.dbcr1)
+#define dbcr_dac(task)	((task)->thread.debug.dbcr1)
 #define DBCR_DAC1R	DBCR1_DAC1R
 #define DBCR_DAC1W	DBCR1_DAC1W
 #define DBCR_DAC2R	DBCR1_DAC2R
@@ -441,7 +441,7 @@
 #define DBCR0_CRET	0x00000020	/* Critical Return Debug Event */
 #define DBCR0_FT	0x00000001	/* Freeze Timers on debug event */
 
-#define dbcr_dac(task)	((task)->thread.dbcr0)
+#define dbcr_dac(task)	((task)->thread.debug.dbcr0)
 #define DBCR_DAC1R	DBCR0_DAC1R
 #define DBCR_DAC1W	DBCR0_DAC1W
 #define DBCR_DAC2R	DBCR0_DAC2R
@@ -475,7 +475,7 @@
 #define DBCR1_IAC34MX	0x000000C0	/* Instr Addr 3-4 range eXclusive */
 #define DBCR1_IAC34AT	0x00000001	/* Instr Addr 3-4 range Toggle */
 
-#define dbcr_iac_range(task)	((task)->thread.dbcr1)
+#define dbcr_iac_range(task)	((task)->thread.debug.dbcr1)
 #define DBCR_IAC12I	DBCR1_IAC12M	/* Range Inclusive */
 #define DBCR_IAC12X	DBCR1_IAC12MX	/* Range Exclusive */
 #define DBCR_IAC12MODE	DBCR1_IAC12MX	/* IAC 1-2 Mode Bits */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index fda7f4020a33..95ba8095fc4a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -113,7 +113,7 @@ int main(void)
 #endif /* CONFIG_SPE */
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-	DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
+	DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0));
 #endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8eb6e39b7343..a2b9231cfd48 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -314,28 +314,28 @@ static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);
  */
 static void set_debug_reg_defaults(struct thread_struct *thread)
 {
-	thread->iac1 = thread->iac2 = 0;
+	thread->debug.iac1 = thread->debug.iac2 = 0;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
-	thread->iac3 = thread->iac4 = 0;
+	thread->debug.iac3 = thread->debug.iac4 = 0;
 #endif
-	thread->dac1 = thread->dac2 = 0;
+	thread->debug.dac1 = thread->debug.dac2 = 0;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-	thread->dvc1 = thread->dvc2 = 0;
+	thread->debug.dvc1 = thread->debug.dvc2 = 0;
 #endif
-	thread->dbcr0 = 0;
+	thread->debug.dbcr0 = 0;
 #ifdef CONFIG_BOOKE
 	/*
 	 * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
 	 */
-	thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
+	thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
 			DBCR1_IAC3US | DBCR1_IAC4US;
 	/*
 	 * Force Data Address Compare User/Supervisor bits to be User-only
 	 * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.
 	 */
-	thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+	thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
 #else
-	thread->dbcr1 = 0;
+	thread->debug.dbcr1 = 0;
 #endif
 }
 
@@ -348,22 +348,22 @@ static void prime_debug_regs(struct thread_struct *thread)
 	 */
 	mtmsr(mfmsr() & ~MSR_DE);
 
-	mtspr(SPRN_IAC1, thread->iac1);
-	mtspr(SPRN_IAC2, thread->iac2);
+	mtspr(SPRN_IAC1, thread->debug.iac1);
+	mtspr(SPRN_IAC2, thread->debug.iac2);
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
-	mtspr(SPRN_IAC3, thread->iac3);
-	mtspr(SPRN_IAC4, thread->iac4);
+	mtspr(SPRN_IAC3, thread->debug.iac3);
+	mtspr(SPRN_IAC4, thread->debug.iac4);
 #endif
-	mtspr(SPRN_DAC1, thread->dac1);
-	mtspr(SPRN_DAC2, thread->dac2);
+	mtspr(SPRN_DAC1, thread->debug.dac1);
+	mtspr(SPRN_DAC2, thread->debug.dac2);
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-	mtspr(SPRN_DVC1, thread->dvc1);
-	mtspr(SPRN_DVC2, thread->dvc2);
+	mtspr(SPRN_DVC1, thread->debug.dvc1);
+	mtspr(SPRN_DVC2, thread->debug.dvc2);
 #endif
-	mtspr(SPRN_DBCR0, thread->dbcr0);
-	mtspr(SPRN_DBCR1, thread->dbcr1);
+	mtspr(SPRN_DBCR0, thread->debug.dbcr0);
+	mtspr(SPRN_DBCR1, thread->debug.dbcr1);
 #ifdef CONFIG_BOOKE
-	mtspr(SPRN_DBCR2, thread->dbcr2);
+	mtspr(SPRN_DBCR2, thread->debug.dbcr2);
 #endif
 }
 /*
@@ -373,8 +373,8 @@ static void prime_debug_regs(struct thread_struct *thread)
  */
 static void switch_booke_debug_regs(struct thread_struct *new_thread)
 {
-	if ((current->thread.dbcr0 & DBCR0_IDM)
-		|| (new_thread->dbcr0 & DBCR0_IDM))
+	if ((current->thread.debug.dbcr0 & DBCR0_IDM)
+		|| (new_thread->debug.dbcr0 & DBCR0_IDM))
 			prime_debug_regs(new_thread);
 }
 #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 9a0d24c390a3..ddaf1780879c 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -854,8 +854,8 @@ void user_enable_single_step(struct task_struct *task)
 
 	if (regs != NULL) {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		task->thread.dbcr0 &= ~DBCR0_BT;
-		task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+		task->thread.debug.dbcr0 &= ~DBCR0_BT;
+		task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
 		regs->msr |= MSR_DE;
 #else
 		regs->msr &= ~MSR_BE;
@@ -871,8 +871,8 @@ void user_enable_block_step(struct task_struct *task)
 
 	if (regs != NULL) {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		task->thread.dbcr0 &= ~DBCR0_IC;
-		task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT;
+		task->thread.debug.dbcr0 &= ~DBCR0_IC;
+		task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
 		regs->msr |= MSR_DE;
 #else
 		regs->msr &= ~MSR_SE;
@@ -894,16 +894,16 @@ void user_disable_single_step(struct task_struct *task)
 		 * And, after doing so, if all debug flags are off, turn
 		 * off DBCR0(IDM) and MSR(DE) .... Torez
 		 */
-		task->thread.dbcr0 &= ~DBCR0_IC;
+		task->thread.debug.dbcr0 &= ~DBCR0_IC;
 		/*
 		 * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
 		 */
-		if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0,
-					task->thread.dbcr1)) {
+		if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+					task->thread.debug.dbcr1)) {
 			/*
 			 * All debug events were off.....
 			 */
-			task->thread.dbcr0 &= ~DBCR0_IDM;
+			task->thread.debug.dbcr0 &= ~DBCR0_IDM;
 			regs->msr &= ~MSR_DE;
 		}
 #else
@@ -1022,14 +1022,14 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	 */
 
 	/* DAC's hold the whole address without any mode flags */
-	task->thread.dac1 = data & ~0x3UL;
+	task->thread.debug.dac1 = data & ~0x3UL;
 
-	if (task->thread.dac1 == 0) {
+	if (task->thread.debug.dac1 == 0) {
 		dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
-		if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0,
-					task->thread.dbcr1)) {
+		if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+					task->thread.debug.dbcr1)) {
 			task->thread.regs->msr &= ~MSR_DE;
-			task->thread.dbcr0 &= ~DBCR0_IDM;
+			task->thread.debug.dbcr0 &= ~DBCR0_IDM;
 		}
 		return 0;
 	}
@@ -1041,7 +1041,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 
 	/* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
 	   register */
-	task->thread.dbcr0 |= DBCR0_IDM;
+	task->thread.debug.dbcr0 |= DBCR0_IDM;
 
 	/* Check for write and read flags and set DBCR0
 	   accordingly */
@@ -1071,10 +1071,10 @@ static long set_instruction_bp(struct task_struct *child,
 			      struct ppc_hw_breakpoint *bp_info)
 {
 	int slot;
-	int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0);
-	int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0);
-	int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0);
-	int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0);
+	int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
+	int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
+	int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
+	int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
 
 	if (dbcr_iac_range(child) & DBCR_IAC12MODE)
 		slot2_in_use = 1;
@@ -1093,9 +1093,9 @@ static long set_instruction_bp(struct task_struct *child,
 		/* We need a pair of IAC regsisters */
 		if ((!slot1_in_use) && (!slot2_in_use)) {
 			slot = 1;
-			child->thread.iac1 = bp_info->addr;
-			child->thread.iac2 = bp_info->addr2;
-			child->thread.dbcr0 |= DBCR0_IAC1;
+			child->thread.debug.iac1 = bp_info->addr;
+			child->thread.debug.iac2 = bp_info->addr2;
+			child->thread.debug.dbcr0 |= DBCR0_IAC1;
 			if (bp_info->addr_mode ==
 					PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
 				dbcr_iac_range(child) |= DBCR_IAC12X;
@@ -1104,9 +1104,9 @@ static long set_instruction_bp(struct task_struct *child,
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 		} else if ((!slot3_in_use) && (!slot4_in_use)) {
 			slot = 3;
-			child->thread.iac3 = bp_info->addr;
-			child->thread.iac4 = bp_info->addr2;
-			child->thread.dbcr0 |= DBCR0_IAC3;
+			child->thread.debug.iac3 = bp_info->addr;
+			child->thread.debug.iac4 = bp_info->addr2;
+			child->thread.debug.dbcr0 |= DBCR0_IAC3;
 			if (bp_info->addr_mode ==
 					PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
 				dbcr_iac_range(child) |= DBCR_IAC34X;
@@ -1126,30 +1126,30 @@ static long set_instruction_bp(struct task_struct *child,
 			 */
 			if (slot2_in_use || (slot3_in_use == slot4_in_use)) {
 				slot = 1;
-				child->thread.iac1 = bp_info->addr;
-				child->thread.dbcr0 |= DBCR0_IAC1;
+				child->thread.debug.iac1 = bp_info->addr;
+				child->thread.debug.dbcr0 |= DBCR0_IAC1;
 				goto out;
 			}
 		}
 		if (!slot2_in_use) {
 			slot = 2;
-			child->thread.iac2 = bp_info->addr;
-			child->thread.dbcr0 |= DBCR0_IAC2;
+			child->thread.debug.iac2 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC2;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 		} else if (!slot3_in_use) {
 			slot = 3;
-			child->thread.iac3 = bp_info->addr;
-			child->thread.dbcr0 |= DBCR0_IAC3;
+			child->thread.debug.iac3 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC3;
 		} else if (!slot4_in_use) {
 			slot = 4;
-			child->thread.iac4 = bp_info->addr;
-			child->thread.dbcr0 |= DBCR0_IAC4;
+			child->thread.debug.iac4 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC4;
 #endif
 		} else
 			return -ENOSPC;
 	}
 out:
-	child->thread.dbcr0 |= DBCR0_IDM;
+	child->thread.debug.dbcr0 |= DBCR0_IDM;
 	child->thread.regs->msr |= MSR_DE;
 
 	return slot;
@@ -1159,49 +1159,49 @@ static int del_instruction_bp(struct task_struct *child, int slot)
 {
 	switch (slot) {
 	case 1:
-		if ((child->thread.dbcr0 & DBCR0_IAC1) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
 			/* address range - clear slots 1 & 2 */
-			child->thread.iac2 = 0;
+			child->thread.debug.iac2 = 0;
 			dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
 		}
-		child->thread.iac1 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC1;
+		child->thread.debug.iac1 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
 		break;
 	case 2:
-		if ((child->thread.dbcr0 & DBCR0_IAC2) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC12MODE)
 			/* used in a range */
 			return -EINVAL;
-		child->thread.iac2 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC2;
+		child->thread.debug.iac2 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
 		break;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 	case 3:
-		if ((child->thread.dbcr0 & DBCR0_IAC3) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
 			/* address range - clear slots 3 & 4 */
-			child->thread.iac4 = 0;
+			child->thread.debug.iac4 = 0;
 			dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
 		}
-		child->thread.iac3 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC3;
+		child->thread.debug.iac3 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
 		break;
 	case 4:
-		if ((child->thread.dbcr0 & DBCR0_IAC4) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC34MODE)
 			/* Used in a range */
 			return -EINVAL;
-		child->thread.iac4 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC4;
+		child->thread.debug.iac4 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
 		break;
 #endif
 	default:
@@ -1231,18 +1231,18 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
 			dbcr_dac(child) |= DBCR_DAC1R;
 		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
 			dbcr_dac(child) |= DBCR_DAC1W;
-		child->thread.dac1 = (unsigned long)bp_info->addr;
+		child->thread.debug.dac1 = (unsigned long)bp_info->addr;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
 		if (byte_enable) {
-			child->thread.dvc1 =
+			child->thread.debug.dvc1 =
 				(unsigned long)bp_info->condition_value;
-			child->thread.dbcr2 |=
+			child->thread.debug.dbcr2 |=
 				((byte_enable << DBCR2_DVC1BE_SHIFT) |
 				 (condition_mode << DBCR2_DVC1M_SHIFT));
 		}
 #endif
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-	} else if (child->thread.dbcr2 & DBCR2_DAC12MODE) {
+	} else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
 		/* Both dac1 and dac2 are part of a range */
 		return -ENOSPC;
 #endif
@@ -1252,19 +1252,19 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
 			dbcr_dac(child) |= DBCR_DAC2R;
 		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
 			dbcr_dac(child) |= DBCR_DAC2W;
-		child->thread.dac2 = (unsigned long)bp_info->addr;
+		child->thread.debug.dac2 = (unsigned long)bp_info->addr;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
 		if (byte_enable) {
-			child->thread.dvc2 =
+			child->thread.debug.dvc2 =
 				(unsigned long)bp_info->condition_value;
-			child->thread.dbcr2 |=
+			child->thread.debug.dbcr2 |=
 				((byte_enable << DBCR2_DVC2BE_SHIFT) |
 				 (condition_mode << DBCR2_DVC2M_SHIFT));
 		}
 #endif
 	} else
 		return -ENOSPC;
-	child->thread.dbcr0 |= DBCR0_IDM;
+	child->thread.debug.dbcr0 |= DBCR0_IDM;
 	child->thread.regs->msr |= MSR_DE;
 
 	return slot + 4;
@@ -1276,32 +1276,32 @@ static int del_dac(struct task_struct *child, int slot)
 		if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
 			return -ENOENT;
 
-		child->thread.dac1 = 0;
+		child->thread.debug.dac1 = 0;
 		dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-		if (child->thread.dbcr2 & DBCR2_DAC12MODE) {
-			child->thread.dac2 = 0;
-			child->thread.dbcr2 &= ~DBCR2_DAC12MODE;
+		if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+			child->thread.debug.dac2 = 0;
+			child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
 		}
-		child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
+		child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
 #endif
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-		child->thread.dvc1 = 0;
+		child->thread.debug.dvc1 = 0;
 #endif
 	} else if (slot == 2) {
 		if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
 			return -ENOENT;
 
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-		if (child->thread.dbcr2 & DBCR2_DAC12MODE)
+		if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
 			/* Part of a range */
 			return -EINVAL;
-		child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
+		child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
 #endif
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-		child->thread.dvc2 = 0;
+		child->thread.debug.dvc2 = 0;
 #endif
-		child->thread.dac2 = 0;
+		child->thread.debug.dac2 = 0;
 		dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
 	} else
 		return -EINVAL;
@@ -1343,22 +1343,22 @@ static int set_dac_range(struct task_struct *child,
 			return -EIO;
 	}
 
-	if (child->thread.dbcr0 &
+	if (child->thread.debug.dbcr0 &
 	    (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
 		return -ENOSPC;
 
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
-		child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
+		child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
-		child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
-	child->thread.dac1 = bp_info->addr;
-	child->thread.dac2 = bp_info->addr2;
+		child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
+	child->thread.debug.dac1 = bp_info->addr;
+	child->thread.debug.dac2 = bp_info->addr2;
 	if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
-		child->thread.dbcr2  |= DBCR2_DAC12M;
+		child->thread.debug.dbcr2  |= DBCR2_DAC12M;
 	else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
-		child->thread.dbcr2  |= DBCR2_DAC12MX;
+		child->thread.debug.dbcr2  |= DBCR2_DAC12MX;
 	else	/* PPC_BREAKPOINT_MODE_MASK */
-		child->thread.dbcr2  |= DBCR2_DAC12MM;
+		child->thread.debug.dbcr2  |= DBCR2_DAC12MM;
 	child->thread.regs->msr |= MSR_DE;
 
 	return 5;
@@ -1489,9 +1489,9 @@ static long ppc_del_hwdebug(struct task_struct *child, long data)
 		rc = del_dac(child, (int)data - 4);
 
 	if (!rc) {
-		if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0,
-					child->thread.dbcr1)) {
-			child->thread.dbcr0 &= ~DBCR0_IDM;
+		if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
+					child->thread.debug.dbcr1)) {
+			child->thread.debug.dbcr0 &= ~DBCR0_IDM;
 			child->thread.regs->msr &= ~MSR_DE;
 		}
 	}
@@ -1669,7 +1669,7 @@ long arch_ptrace(struct task_struct *child, long request,
 		if (addr > 0)
 			break;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		ret = put_user(child->thread.dac1, datalp);
+		ret = put_user(child->thread.debug.dac1, datalp);
 #else
 		dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
 			     (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index f51599e941c7..18c7c65ea46d 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -269,7 +269,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if (addr > 0)
 			break;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		ret = put_user(child->thread.dac1, (u32 __user *)data);
+		ret = put_user(child->thread.debug.dac1, (u32 __user *)data);
 #else
 		dabr_fake = (
 			(child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index bebdf1a1a540..3f220d93c72f 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1309,7 +1309,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 	unsigned char tmp;
 	unsigned long new_msr = regs->msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-	unsigned long new_dbcr0 = current->thread.dbcr0;
+	unsigned long new_dbcr0 = current->thread.debug.dbcr0;
 #endif
 
 	for (i=0; i<ndbg; i++) {
@@ -1324,7 +1324,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 			} else {
 				new_dbcr0 &= ~DBCR0_IC;
 				if (!DBCR_ACTIVE_EVENTS(new_dbcr0,
-						current->thread.dbcr1)) {
+						current->thread.debug.dbcr1)) {
 					new_msr &= ~MSR_DE;
 					new_dbcr0 &= ~DBCR0_IDM;
 				}
@@ -1359,7 +1359,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 	   the user is really doing something wrong. */
 	regs->msr = new_msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-	current->thread.dbcr0 = new_dbcr0;
+	current->thread.debug.dbcr0 = new_dbcr0;
 #endif
 
 	if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f783c932faeb..4f5df4e7df12 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -351,8 +351,8 @@ static inline int check_io_access(struct pt_regs *regs)
 #define REASON_TRAP		ESR_PTR
 
 /* single-step stuff */
-#define single_stepping(regs)	(current->thread.dbcr0 & DBCR0_IC)
-#define clear_single_step(regs)	(current->thread.dbcr0 &= ~DBCR0_IC)
+#define single_stepping(regs)	(current->thread.debug.dbcr0 & DBCR0_IC)
+#define clear_single_step(regs)	(current->thread.debug.dbcr0 &= ~DBCR0_IC)
 
 #else
 /* On non-4xx, the reason for the machine check or program
@@ -1486,7 +1486,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
 	if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
 		dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-		current->thread.dbcr2 &= ~DBCR2_DAC12MODE;
+		current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
 #endif
 		do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT,
 			     5);
@@ -1497,24 +1497,24 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
 			     6);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC1) {
-		current->thread.dbcr0 &= ~DBCR0_IAC1;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
 		dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
 		do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT,
 			     1);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC2) {
-		current->thread.dbcr0 &= ~DBCR0_IAC2;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
 		do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT,
 			     2);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC3) {
-		current->thread.dbcr0 &= ~DBCR0_IAC3;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
 		dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
 		do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT,
 			     3);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC4) {
-		current->thread.dbcr0 &= ~DBCR0_IAC4;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
 		do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT,
 			     4);
 		changed |= 0x01;
@@ -1524,19 +1524,20 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
 	 * Check all other debug flags and see if that bit needs to be turned
 	 * back on or not.
 	 */
-	if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1))
+	if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
+			       current->thread.debug.dbcr1))
 		regs->msr |= MSR_DE;
 	else
 		/* Make sure the IDM flag is off */
-		current->thread.dbcr0 &= ~DBCR0_IDM;
+		current->thread.debug.dbcr0 &= ~DBCR0_IDM;
 
 	if (changed & 0x01)
-		mtspr(SPRN_DBCR0, current->thread.dbcr0);
+		mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
 }
 
 void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 {
-	current->thread.dbsr = debug_status;
+	current->thread.debug.dbsr = debug_status;
 
 	/* Hack alert: On BookE, Branch Taken stops on the branch itself, while
 	 * on server, it stops on the target of the branch. In order to simulate
@@ -1553,8 +1554,8 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 
 		/* Do the single step trick only when coming from userspace */
 		if (user_mode(regs)) {
-			current->thread.dbcr0 &= ~DBCR0_BT;
-			current->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+			current->thread.debug.dbcr0 &= ~DBCR0_BT;
+			current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
 			regs->msr |= MSR_DE;
 			return;
 		}
@@ -1582,13 +1583,13 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 			return;
 
 		if (user_mode(regs)) {
-			current->thread.dbcr0 &= ~DBCR0_IC;
-			if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0,
-					       current->thread.dbcr1))
+			current->thread.debug.dbcr0 &= ~DBCR0_IC;
+			if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
+					       current->thread.debug.dbcr1))
 				regs->msr |= MSR_DE;
 			else
 				/* Make sure the IDM bit is off */
-				current->thread.dbcr0 &= ~DBCR0_IDM;
+				current->thread.debug.dbcr0 &= ~DBCR0_IDM;
 		}
 
 		_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
-- 
cgit v1.2.3


From fc82cf113bded906e0fc1d4a8636a6776670b1c5 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <r65777@freescale.com>
Date: Thu, 4 Jul 2013 12:27:44 +0530
Subject: powerpc: export debug registers save function for KVM

KVM need this function when switching from vcpu to user-space
thread. My subsequent patch will use this function.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
Acked-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/switch_to.h | 1 +
 arch/powerpc/kernel/process.c        | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 2be5618cdec6..9ee12610af02 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -35,6 +35,7 @@ extern void giveup_vsx(struct task_struct *);
 extern void enable_kernel_spe(void);
 extern void giveup_spe(struct task_struct *);
 extern void load_up_spe(struct task_struct *);
+extern void switch_booke_debug_regs(struct thread_struct *new_thread);
 
 #ifndef CONFIG_SMP
 extern void discard_lazy_cpu_state(void);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a2b9231cfd48..c24a90667675 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -371,12 +371,13 @@ static void prime_debug_regs(struct thread_struct *thread)
  * debug registers, set the debug registers from the values
  * stored in the new thread.
  */
-static void switch_booke_debug_regs(struct thread_struct *new_thread)
+void switch_booke_debug_regs(struct thread_struct *new_thread)
 {
 	if ((current->thread.debug.dbcr0 & DBCR0_IDM)
 		|| (new_thread->debug.dbcr0 & DBCR0_IDM))
 			prime_debug_regs(new_thread);
 }
+EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
 #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
 static void set_debug_reg_defaults(struct thread_struct *thread)
-- 
cgit v1.2.3


From b12c784123ff805c066449ca580f15a903c24994 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <r65777@freescale.com>
Date: Thu, 4 Jul 2013 12:27:45 +0530
Subject: KVM: PPC: E500: exit to user space on "ehpriv 1" instruction

"ehpriv 1" instruction is used for setting software breakpoints
by user space. This patch adds support to exit to user space
with "run->debug" have relevant information.

As this is the first point we are using run->debug, also defined
the run->debug structure.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/disassemble.h |  4 ++++
 arch/powerpc/include/asm/kvm_booke.h   |  7 ++++++-
 arch/powerpc/include/uapi/asm/kvm.h    | 21 +++++++++++++++++----
 arch/powerpc/kvm/booke.c               |  2 +-
 arch/powerpc/kvm/e500_emulate.c        | 26 ++++++++++++++++++++++++++
 5 files changed, 54 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
index 9b198d1b3b2b..856f8deb557a 100644
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -77,4 +77,8 @@ static inline unsigned int get_d(u32 inst)
 	return inst & 0xffff;
 }
 
+static inline unsigned int get_oc(u32 inst)
+{
+	return (inst >> 11) & 0x7fff;
+}
 #endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index d3c1eb34c986..dd8f61510dfd 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -26,7 +26,12 @@
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS                        64
 
-#define KVMPPC_INST_EHPRIV	0x7c00021c
+#define KVMPPC_INST_EHPRIV		0x7c00021c
+#define EHPRIV_OC_SHIFT			11
+/* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */
+#define EHPRIV_OC_DEBUG			1
+#define KVMPPC_INST_EHPRIV_DEBUG	(KVMPPC_INST_EHPRIV | \
+					 (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT))
 
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index e420d46d363f..482bba56e499 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -269,7 +269,24 @@ struct kvm_fpu {
 	__u64 fpr[32];
 };
 
+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE		0x0
+#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
 struct kvm_debug_exit_arch {
+	__u64 address;
+	/*
+	 * exiting to userspace because of h/w breakpoint, watchpoint
+	 * (read, write or both) and software breakpoint.
+	 */
+	__u32 status;
+	__u32 reserved;
 };
 
 /* for KVM_SET_GUEST_DEBUG */
@@ -281,10 +298,6 @@ struct kvm_guest_debug_arch {
 		 * Type denotes h/w breakpoint, read watchpoint, write
 		 * watchpoint or watchpoint (both read and write).
 		 */
-#define KVMPPC_DEBUG_NONE		0x0
-#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
-#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
-#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
 		__u32 type;
 		__u32 reserved;
 	} bp[16];
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 65fa7754b194..1d6edf00e0e5 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1477,7 +1477,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		val = get_reg_val(reg->id, vcpu->arch.tsr);
 		break;
 	case KVM_REG_PPC_DEBUG_INST:
-		val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
+		val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG);
 		break;
 	case KVM_REG_PPC_VRSAVE:
 		val = get_reg_val(reg->id, vcpu->arch.vrsave);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index b10a01243abd..6163a0318d2a 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -26,6 +26,7 @@
 #define XOP_TLBRE   946
 #define XOP_TLBWE   978
 #define XOP_TLBILX  18
+#define XOP_EHPRIV  270
 
 #ifdef CONFIG_KVM_E500MC
 static int dbell2prio(ulong param)
@@ -82,6 +83,26 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
 }
 #endif
 
+static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+				   unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+
+	switch (get_oc(inst)) {
+	case EHPRIV_OC_DEBUG:
+		run->exit_reason = KVM_EXIT_DEBUG;
+		run->debug.arch.address = vcpu->arch.pc;
+		run->debug.arch.status = 0;
+		kvmppc_account_exit(vcpu, DEBUG_EXITS);
+		emulated = EMULATE_EXIT_USER;
+		*advance = 0;
+		break;
+	default:
+		emulated = EMULATE_FAIL;
+	}
+	return emulated;
+}
+
 int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                            unsigned int inst, int *advance)
 {
@@ -130,6 +151,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
 			break;
 
+		case XOP_EHPRIV:
+			emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
+							   advance);
+			break;
+
 		default:
 			emulated = EMULATE_FAIL;
 		}
-- 
cgit v1.2.3


From 547465ef8bcad77a3a73dad5151d9d28a0c1b88d Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <r65777@freescale.com>
Date: Thu, 4 Jul 2013 12:27:46 +0530
Subject: KVM: PPC: E500: Using "struct debug_reg"

For KVM also use the "struct debug_reg" defined in asm/processor.h

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h | 13 +------------
 arch/powerpc/kvm/booke.c            | 34 ++++++++++++++++++++++++----------
 2 files changed, 25 insertions(+), 22 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index b6881917cd84..7e83c5ff830a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -387,17 +387,6 @@ struct kvmppc_slb {
 #define KVMPPC_EPR_USER		1 /* exit to userspace to fill EPR */
 #define KVMPPC_EPR_KERNEL	2 /* in-kernel irqchip */
 
-struct kvmppc_booke_debug_reg {
-	u32 dbcr0;
-	u32 dbcr1;
-	u32 dbcr2;
-#ifdef CONFIG_KVM_E500MC
-	u32 dbcr4;
-#endif
-	u64 iac[KVMPPC_BOOKE_MAX_IAC];
-	u64 dac[KVMPPC_BOOKE_MAX_DAC];
-};
-
 #define KVMPPC_IRQ_DEFAULT	0
 #define KVMPPC_IRQ_MPIC		1
 #define KVMPPC_IRQ_XICS		2
@@ -549,7 +538,7 @@ struct kvm_vcpu_arch {
 	u32 eptcfg;
 	u32 epr;
 	u32 crit_save;
-	struct kvmppc_booke_debug_reg dbg_reg;
+	struct debug_reg dbg_reg;
 #endif
 	gpa_t paddr_accessed;
 	gva_t vaddr_accessed;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1d6edf00e0e5..a972fb600a99 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1441,7 +1441,6 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	int r = 0;
 	union kvmppc_one_reg val;
 	int size;
-	long int i;
 
 	size = one_reg_size(reg->id);
 	if (size > sizeof(val))
@@ -1449,16 +1448,24 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 
 	switch (reg->id) {
 	case KVM_REG_PPC_IAC1:
+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1);
+		break;
 	case KVM_REG_PPC_IAC2:
+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2);
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
 	case KVM_REG_PPC_IAC3:
+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3);
+		break;
 	case KVM_REG_PPC_IAC4:
-		i = reg->id - KVM_REG_PPC_IAC1;
-		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);
 		break;
+#endif
 	case KVM_REG_PPC_DAC1:
+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1);
+		break;
 	case KVM_REG_PPC_DAC2:
-		i = reg->id - KVM_REG_PPC_DAC1;
-		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
 		break;
 	case KVM_REG_PPC_EPR: {
 		u32 epr = get_guest_epr(vcpu);
@@ -1501,7 +1508,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	int r = 0;
 	union kvmppc_one_reg val;
 	int size;
-	long int i;
 
 	size = one_reg_size(reg->id);
 	if (size > sizeof(val))
@@ -1512,16 +1518,24 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 
 	switch (reg->id) {
 	case KVM_REG_PPC_IAC1:
+		vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val);
+		break;
 	case KVM_REG_PPC_IAC2:
+		vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val);
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
 	case KVM_REG_PPC_IAC3:
+		vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val);
+		break;
 	case KVM_REG_PPC_IAC4:
-		i = reg->id - KVM_REG_PPC_IAC1;
-		vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
+		vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);
 		break;
+#endif
 	case KVM_REG_PPC_DAC1:
+		vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val);
+		break;
 	case KVM_REG_PPC_DAC2:
-		i = reg->id - KVM_REG_PPC_DAC1;
-		vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
+		vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
 		break;
 	case KVM_REG_PPC_EPR: {
 		u32 new_epr = set_reg_val(reg->id, val);
-- 
cgit v1.2.3


From ce11e48b7fdd256ec68b932a89b397a790566031 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <r65777@freescale.com>
Date: Thu, 4 Jul 2013 12:27:47 +0530
Subject: KVM: PPC: E500: Add userspace debug stub support

This patch adds the debug stub support on booke/bookehv.
Now QEMU debug stub can use hw breakpoint, watchpoint and
software breakpoint to debug guest.

This is how we save/restore debug register context when switching
between guest, userspace and kernel user-process:

When QEMU is running
 -> thread->debug_reg == QEMU debug register context.
 -> Kernel will handle switching the debug register on context switch.
 -> no vcpu_load() called

QEMU makes ioctls (except RUN)
 -> This will call vcpu_load()
 -> should not change context.
 -> Some ioctls can change vcpu debug register, context saved in vcpu->debug_regs

QEMU Makes RUN ioctl
 -> Save thread->debug_reg on STACK
 -> Store thread->debug_reg == vcpu->debug_reg
 -> load thread->debug_reg
 -> RUN VCPU ( So thread points to vcpu context )

Context switch happens When VCPU running
 -> makes vcpu_load() should not load any context
 -> kernel loads the vcpu context as thread->debug_regs points to vcpu context.

On heavyweight_exit
 -> Load the context saved on stack in thread->debug_reg

Currently we do not support debug resource emulation to guest,
On debug exception, always exit to user space irrespective of
user space is expecting the debug exception or not. If this is
unexpected exception (breakpoint/watchpoint event not set by
userspace) then let us leave the action on user space. This
is similar to what it was before, only thing is that now we
have proper exit state available to user space.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h |   3 +
 arch/powerpc/include/uapi/asm/kvm.h |   1 +
 arch/powerpc/kvm/booke.c            | 240 +++++++++++++++++++++++++++++++++---
 arch/powerpc/kvm/booke.h            |   5 +
 4 files changed, 231 insertions(+), 18 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7e83c5ff830a..4959ff1b5376 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -538,7 +538,10 @@ struct kvm_vcpu_arch {
 	u32 eptcfg;
 	u32 epr;
 	u32 crit_save;
+	/* guest debug registers*/
 	struct debug_reg dbg_reg;
+	/* hardware visible debug registers when in guest state */
+	struct debug_reg shadow_dbg_reg;
 #endif
 	gpa_t paddr_accessed;
 	gva_t vaddr_accessed;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 482bba56e499..6836ec79a830 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -27,6 +27,7 @@
 #define __KVM_HAVE_PPC_SMT
 #define __KVM_HAVE_IRQCHIP
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_GUEST_DEBUG
 
 struct kvm_regs {
 	__u64 pc;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a972fb600a99..8b6a790c0562 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+	/* Synchronize guest's desire to get debug interrupts into shadow MSR */
+#ifndef CONFIG_KVM_BOOKE_HV
+	vcpu->arch.shadow_msr &= ~MSR_DE;
+	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
+#endif
+
+	/* Force enable debug interrupts when user space wants to debug */
+	if (vcpu->guest_debug) {
+#ifdef CONFIG_KVM_BOOKE_HV
+		/*
+		 * Since there is no shadow MSR, sync MSR_DE into the guest
+		 * visible MSR.
+		 */
+		vcpu->arch.shared->msr |= MSR_DE;
+#else
+		vcpu->arch.shadow_msr |= MSR_DE;
+		vcpu->arch.shared->msr &= ~MSR_DE;
+#endif
+	}
+}
+
 /*
  * Helper function for "full" MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
@@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 	kvmppc_mmu_msr_notify(vcpu, old_msr);
 	kvmppc_vcpu_sync_spe(vcpu);
 	kvmppc_vcpu_sync_fpu(vcpu);
+	kvmppc_vcpu_sync_debug(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
@@ -655,6 +679,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret, s;
+	struct thread_struct thread;
 #ifdef CONFIG_PPC_FPU
 	unsigned int fpscr;
 	int fpexc_mode;
@@ -696,6 +721,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	kvmppc_load_guest_fp(vcpu);
 #endif
 
+	/* Switch to guest debug context */
+	thread.debug = vcpu->arch.shadow_dbg_reg;
+	switch_booke_debug_regs(&thread);
+	thread.debug = current->thread.debug;
+	current->thread.debug = vcpu->arch.shadow_dbg_reg;
+
 	kvmppc_fix_ee_before_entry();
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
@@ -703,6 +734,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	/* No need for kvm_guest_exit. It's done in handle_exit.
 	   We also get here with interrupts enabled. */
 
+	/* Switch back to user space debug context */
+	switch_booke_debug_regs(&thread);
+	current->thread.debug = thread.debug;
+
 #ifdef CONFIG_PPC_FPU
 	kvmppc_save_guest_fp(vcpu);
 
@@ -758,6 +793,30 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 }
 
+static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+	u32 dbsr = vcpu->arch.dbsr;
+
+	run->debug.arch.status = 0;
+	run->debug.arch.address = vcpu->arch.pc;
+
+	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
+		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
+	} else {
+		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
+			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
+		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
+			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
+		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
+			run->debug.arch.address = dbg_reg->dac1;
+		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
+			run->debug.arch.address = dbg_reg->dac2;
+	}
+
+	return RESUME_HOST;
+}
+
 static void kvmppc_fill_pt_regs(struct pt_regs *regs)
 {
 	ulong r1, ip, msr, lr;
@@ -818,6 +877,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
 	case BOOKE_INTERRUPT_CRITICAL:
 		unknown_exception(&regs);
 		break;
+	case BOOKE_INTERRUPT_DEBUG:
+		/* Save DBSR before preemption is enabled */
+		vcpu->arch.dbsr = mfspr(SPRN_DBSR);
+		kvmppc_clear_dbsr();
+		break;
 	}
 }
 
@@ -1135,18 +1199,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	}
 
 	case BOOKE_INTERRUPT_DEBUG: {
-		u32 dbsr;
-
-		vcpu->arch.pc = mfspr(SPRN_CSRR0);
-
-		/* clear IAC events in DBSR register */
-		dbsr = mfspr(SPRN_DBSR);
-		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
-		mtspr(SPRN_DBSR, dbsr);
-
-		run->exit_reason = KVM_EXIT_DEBUG;
+		r = kvmppc_handle_debug(run, vcpu);
+		if (r == RESUME_HOST)
+			run->exit_reason = KVM_EXIT_DEBUG;
 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
-		r = RESUME_HOST;
 		break;
 	}
 
@@ -1197,7 +1253,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	kvmppc_set_msr(vcpu, 0);
 
 #ifndef CONFIG_KVM_BOOKE_HV
-	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
+	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
 	vcpu->arch.shadow_pid = 1;
 	vcpu->arch.shared->msr = 0;
 #endif
@@ -1580,12 +1636,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	return r;
 }
 
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-					 struct kvm_guest_debug *dbg)
-{
-	return -EINVAL;
-}
-
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -ENOTSUPP;
@@ -1691,6 +1741,157 @@ void kvmppc_decrementer_func(unsigned long data)
 	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
 }
 
+static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg,
+				       uint64_t addr, int index)
+{
+	switch (index) {
+	case 0:
+		dbg_reg->dbcr0 |= DBCR0_IAC1;
+		dbg_reg->iac1 = addr;
+		break;
+	case 1:
+		dbg_reg->dbcr0 |= DBCR0_IAC2;
+		dbg_reg->iac2 = addr;
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	case 2:
+		dbg_reg->dbcr0 |= DBCR0_IAC3;
+		dbg_reg->iac3 = addr;
+		break;
+	case 3:
+		dbg_reg->dbcr0 |= DBCR0_IAC4;
+		dbg_reg->iac4 = addr;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	dbg_reg->dbcr0 |= DBCR0_IDM;
+	return 0;
+}
+
+static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr,
+				       int type, int index)
+{
+	switch (index) {
+	case 0:
+		if (type & KVMPPC_DEBUG_WATCH_READ)
+			dbg_reg->dbcr0 |= DBCR0_DAC1R;
+		if (type & KVMPPC_DEBUG_WATCH_WRITE)
+			dbg_reg->dbcr0 |= DBCR0_DAC1W;
+		dbg_reg->dac1 = addr;
+		break;
+	case 1:
+		if (type & KVMPPC_DEBUG_WATCH_READ)
+			dbg_reg->dbcr0 |= DBCR0_DAC2R;
+		if (type & KVMPPC_DEBUG_WATCH_WRITE)
+			dbg_reg->dbcr0 |= DBCR0_DAC2W;
+		dbg_reg->dac2 = addr;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dbg_reg->dbcr0 |= DBCR0_IDM;
+	return 0;
+}
+void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set)
+{
+	/* XXX: Add similar MSR protection for BookE-PR */
+#ifdef CONFIG_KVM_BOOKE_HV
+	BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP));
+	if (set) {
+		if (prot_bitmap & MSR_UCLE)
+			vcpu->arch.shadow_msrp |= MSRP_UCLEP;
+		if (prot_bitmap & MSR_DE)
+			vcpu->arch.shadow_msrp |= MSRP_DEP;
+		if (prot_bitmap & MSR_PMM)
+			vcpu->arch.shadow_msrp |= MSRP_PMMP;
+	} else {
+		if (prot_bitmap & MSR_UCLE)
+			vcpu->arch.shadow_msrp &= ~MSRP_UCLEP;
+		if (prot_bitmap & MSR_DE)
+			vcpu->arch.shadow_msrp &= ~MSRP_DEP;
+		if (prot_bitmap & MSR_PMM)
+			vcpu->arch.shadow_msrp &= ~MSRP_PMMP;
+	}
+#endif
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					 struct kvm_guest_debug *dbg)
+{
+	struct debug_reg *dbg_reg;
+	int n, b = 0, w = 0;
+
+	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
+		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+		vcpu->guest_debug = 0;
+		kvm_guest_protect_msr(vcpu, MSR_DE, false);
+		return 0;
+	}
+
+	kvm_guest_protect_msr(vcpu, MSR_DE, true);
+	vcpu->guest_debug = dbg->control;
+	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+	/* Set DBCR0_EDM in guest visible DBCR0 register. */
+	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+
+	/* Code below handles only HW breakpoints */
+	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	/*
+	 * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1
+	 * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0
+	 */
+	dbg_reg->dbcr1 = 0;
+	dbg_reg->dbcr2 = 0;
+#else
+	/*
+	 * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1
+	 * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR
+	 * is set.
+	 */
+	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
+			  DBCR1_IAC4US;
+	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#endif
+
+	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+		return 0;
+
+	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
+		uint64_t addr = dbg->arch.bp[n].addr;
+		uint32_t type = dbg->arch.bp[n].type;
+
+		if (type == KVMPPC_DEBUG_NONE)
+			continue;
+
+		if (type & !(KVMPPC_DEBUG_WATCH_READ |
+			     KVMPPC_DEBUG_WATCH_WRITE |
+			     KVMPPC_DEBUG_BREAKPOINT))
+			return -EINVAL;
+
+		if (type & KVMPPC_DEBUG_BREAKPOINT) {
+			/* Setting H/W breakpoint */
+			if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
+				return -EINVAL;
+		} else {
+			/* Setting H/W watchpoint */
+			if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
+							type, w++))
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = smp_processor_id();
@@ -1701,6 +1902,9 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	current->thread.kvm_vcpu = NULL;
 	vcpu->cpu = -1;
+
+	/* Clear pending debug event in DBSR */
+	kvmppc_clear_dbsr();
 }
 
 int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 5fd1ba693579..a1ff67d04022 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -129,4 +129,9 @@ static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
 		giveup_fpu(current);
 #endif
 }
+
+static inline void kvmppc_clear_dbsr(void)
+{
+	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
+}
 #endif /* __KVM_BOOKE_H__ */
-- 
cgit v1.2.3


From 7aa79938f7d76f5865d0b2a2d9bbe2337560261f Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:17:51 +0530
Subject: kvm: powerpc: book3s: pr: Rename KVM_BOOK3S_PR to
 KVM_BOOK3S_PR_POSSIBLE

With later patches supporting PR kvm as a kernel module, the changes
that has to be built into the main kernel binary to enable PR KVM module
is now selected via KVM_BOOK3S_PR_POSSIBLE

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/exception-64s.h |  2 +-
 arch/powerpc/include/asm/kvm_book3s.h    |  4 ++--
 arch/powerpc/include/asm/kvm_book3s_64.h |  2 +-
 arch/powerpc/include/asm/kvm_host.h      |  2 +-
 arch/powerpc/include/asm/paca.h          |  2 +-
 arch/powerpc/kernel/asm-offsets.c        |  2 +-
 arch/powerpc/kernel/exceptions-64s.S     |  2 +-
 arch/powerpc/kvm/Kconfig                 |  6 +++---
 arch/powerpc/kvm/trace.h                 | 10 +++++-----
 9 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 402c1c466509..a22c9854a633 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -244,7 +244,7 @@ do_kvm_##n:								\
 #define KVM_HANDLER_SKIP(area, h, n)
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 #define KVMTEST_PR(n)			__KVMTEST(n)
 #define KVM_HANDLER_PR(area, h, n)	__KVM_HANDLER(area, h, n)
 #define KVM_HANDLER_PR_SKIP(area, h, n)	__KVM_HANDLER_SKIP(area, h, n)
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 0ec00f4fef91..5c07d10e3c41 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -304,7 +304,7 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 	return vcpu->arch.fault_dar;
 }
 
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 
 static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 {
@@ -339,7 +339,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 
 	return crit;
 }
-#else /* CONFIG_KVM_BOOK3S_PR */
+#else /* CONFIG_KVM_BOOK3S_PR_POSSIBLE */
 
 static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 86d638a3b359..e6ee7fd32d12 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -20,7 +20,7 @@
 #ifndef __ASM_KVM_BOOK3S_64_H__
 #define __ASM_KVM_BOOK3S_64_H__
 
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
 {
 	preempt_disable();
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 4959ff1b5376..9e9f689106e2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -257,7 +257,7 @@ struct kvm_arch {
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
 	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	struct mutex hpt_mutex;
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index a5954cebbc55..b6ea9e068c13 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -166,7 +166,7 @@ struct paca_struct {
 	struct dtl_entry *dtl_curr;	/* pointer corresponding to dtl_ridx */
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	/* We use this to store guest state in */
 	struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
 #endif
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 95ba8095fc4a..29796559c2fb 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -533,7 +533,7 @@ int main(void)
 	DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
 
 #ifdef CONFIG_PPC_BOOK3S_64
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
 # define SVCPU_FIELD(x, f)	DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
 #else
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2a273bed1ec8..3d1c42b03c89 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -425,7 +425,7 @@ data_access_check_stab:
 	mfspr	r9,SPRN_DSISR
 	srdi	r10,r10,60
 	rlwimi	r10,r9,16,0x20
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	lbz	r9,HSTATE_IN_GUEST(r13)
 	rlwimi	r10,r9,8,0x300
 #endif
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index ffaef2cb101a..d0665f231e6f 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -34,7 +34,7 @@ config KVM_BOOK3S_64_HANDLER
 	bool
 	select KVM_BOOK3S_HANDLER
 
-config KVM_BOOK3S_PR
+config KVM_BOOK3S_PR_POSSIBLE
 	bool
 	select KVM_MMIO
 	select MMU_NOTIFIER
@@ -44,7 +44,7 @@ config KVM_BOOK3S_32
 	depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
 	select KVM
 	select KVM_BOOK3S_32_HANDLER
-	select KVM_BOOK3S_PR
+	select KVM_BOOK3S_PR_POSSIBLE
 	---help---
 	  Support running unmodified book3s_32 guest kernels
 	  in virtual machines on book3s_32 host processors.
@@ -91,7 +91,7 @@ config KVM_BOOK3S_64_HV
 config KVM_BOOK3S_64_PR
 	def_bool y
 	depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
-	select KVM_BOOK3S_PR
+	select KVM_BOOK3S_PR_POSSIBLE
 
 config KVM_BOOKE_HV
 	bool
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index a088e9a8c103..9e8368edf371 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -94,7 +94,7 @@ TRACE_EVENT(kvm_exit,
 		__field(	unsigned long,	pc		)
 		__field(	unsigned long,	msr		)
 		__field(	unsigned long,	dar		)
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 		__field(	unsigned long,	srr1		)
 #endif
 		__field(	unsigned long,	last_inst	)
@@ -105,7 +105,7 @@ TRACE_EVENT(kvm_exit,
 		__entry->pc		= kvmppc_get_pc(vcpu);
 		__entry->dar		= kvmppc_get_fault_dar(vcpu);
 		__entry->msr		= vcpu->arch.shared->msr;
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 		__entry->srr1		= vcpu->arch.shadow_srr1;
 #endif
 		__entry->last_inst	= vcpu->arch.last_inst;
@@ -115,7 +115,7 @@ TRACE_EVENT(kvm_exit,
 		" | pc=0x%lx"
 		" | msr=0x%lx"
 		" | dar=0x%lx"
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 		" | srr1=0x%lx"
 #endif
 		" | last_inst=0x%lx"
@@ -124,7 +124,7 @@ TRACE_EVENT(kvm_exit,
 		__entry->pc,
 		__entry->msr,
 		__entry->dar,
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 		__entry->srr1,
 #endif
 		__entry->last_inst
@@ -236,7 +236,7 @@ TRACE_EVENT(kvm_check_requests,
  *                         Book3S trace points                           *
  *************************************************************************/
 
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 
 TRACE_EVENT(kvm_book3s_reenter,
 	TP_PROTO(int r, struct kvm_vcpu *vcpu),
-- 
cgit v1.2.3


From 9975f5e3692d320b4259a4d2edd8a979adb1e535 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:17:52 +0530
Subject: kvm: powerpc: book3s: Add a new config variable
 CONFIG_KVM_BOOK3S_HV_POSSIBLE

This help ups to select the relevant code in the kernel code
when we later move HV and PR bits as seperate modules. The patch
also makes the config options for PR KVM selectable

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h     |  2 --
 arch/powerpc/include/asm/kvm_book3s_64.h  |  6 +++---
 arch/powerpc/include/asm/kvm_book3s_asm.h |  2 +-
 arch/powerpc/include/asm/kvm_host.h       | 10 +++++-----
 arch/powerpc/include/asm/kvm_ppc.h        |  2 +-
 arch/powerpc/kernel/asm-offsets.c         |  8 ++++----
 arch/powerpc/kernel/idle_power7.S         |  2 +-
 arch/powerpc/kvm/Kconfig                  | 18 +++++++++++++++++-
 arch/powerpc/kvm/Makefile                 | 12 ++++++++----
 arch/powerpc/kvm/book3s_exports.c         |  5 +++--
 10 files changed, 43 insertions(+), 24 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 5c07d10e3c41..99ef8711e906 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -199,8 +199,6 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 	return vcpu->arch.book3s;
 }
 
-extern void kvm_return_point(void);
-
 /* Also add subarch specific defines */
 
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index e6ee7fd32d12..bf0fa8b0a883 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -35,7 +35,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #define SPAPR_TCE_SHIFT		12
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
 extern unsigned long kvm_rma_pages;
 #endif
@@ -278,7 +278,7 @@ static inline int is_vrma_hpte(unsigned long hpte_v)
 		(HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
 }
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * Note modification of an HPTE; set the HPTE modified bit
  * if anyone is interested.
@@ -289,6 +289,6 @@ static inline void note_hpte_modification(struct kvm *kvm,
 	if (atomic_read(&kvm->arch.hpte_mod_interest))
 		rev->guest_rpte |= HPTE_GR_MODIFIED;
 }
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 62737113c2b9..0bd9348a4db9 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -83,7 +83,7 @@ struct kvmppc_host_state {
 	u8 restore_hid5;
 	u8 napping;
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	u8 hwthread_req;
 	u8 hwthread_state;
 	u8 host_ipi;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9e9f689106e2..61ce4dca45d3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -224,15 +224,15 @@ struct revmap_entry {
 #define KVMPPC_GOT_PAGE		0x80
 
 struct kvm_arch_memory_slot {
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	unsigned long *rmap;
 	unsigned long *slot_phys;
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 };
 
 struct kvm_arch {
 	unsigned int lpid;
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	unsigned long hpt_virt;
 	struct revmap_entry *revmap;
 	unsigned int host_lpid;
@@ -256,7 +256,7 @@ struct kvm_arch {
 	cpumask_t need_tlb_flush;
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
 	int hpt_cma_alloc;
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	struct mutex hpt_mutex;
 #endif
@@ -592,7 +592,7 @@ struct kvm_vcpu_arch {
 	struct kvmppc_icp *icp; /* XICS presentation controller */
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	struct kvm_vcpu_arch_shared shregs;
 
 	unsigned long pgfault_addr;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index b15554a26c20..1823f38906c6 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -260,7 +260,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 
 struct openpic;
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 extern void kvm_cma_reserve(void) __init;
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 29796559c2fb..1fbb2b63195c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -445,7 +445,7 @@ int main(void)
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
 	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));
 	DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));
 	DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1));
@@ -476,7 +476,7 @@ int main(void)
 	DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
 
 	/* book3s */
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
 	DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
 	DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
@@ -586,7 +586,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
 	HSTATE_FIELD(HSTATE_NAPPING, napping);
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
 	HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
 	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
@@ -602,7 +602,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_DABR, dabr);
 	HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
 	DEFINE(IPI_PRIORITY, IPI_PRIORITY);
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #ifdef CONFIG_PPC_BOOK3S_64
 	HSTATE_FIELD(HSTATE_CFAR, cfar);
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index e11863f4e595..847e40e62fce 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -84,7 +84,7 @@ _GLOBAL(power7_nap)
 	std	r9,_MSR(r1)
 	std	r1,PACAR1(r13)
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	/* Tell KVM we're napping */
 	li	r4,KVM_HWTHREAD_IN_NAP
 	stb	r4,HSTATE_HWTHREAD_STATE(r13)
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index d0665f231e6f..a96d7c32f785 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -39,6 +39,9 @@ config KVM_BOOK3S_PR_POSSIBLE
 	select KVM_MMIO
 	select MMU_NOTIFIER
 
+config KVM_BOOK3S_HV_POSSIBLE
+	bool
+
 config KVM_BOOK3S_32
 	tristate "KVM support for PowerPC book3s_32 processors"
 	depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
@@ -59,6 +62,7 @@ config KVM_BOOK3S_64
 	depends on PPC_BOOK3S_64
 	select KVM_BOOK3S_64_HANDLER
 	select KVM
+	select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
 	---help---
 	  Support running unmodified book3s_64 and book3s_32 guest kernels
 	  in virtual machines on book3s_64 host processors.
@@ -71,6 +75,7 @@ config KVM_BOOK3S_64
 config KVM_BOOK3S_64_HV
 	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	depends on KVM_BOOK3S_64
+	select KVM_BOOK3S_HV_POSSIBLE
 	select MMU_NOTIFIER
 	select CMA
 	---help---
@@ -89,9 +94,20 @@ config KVM_BOOK3S_64_HV
 	  If unsure, say N.
 
 config KVM_BOOK3S_64_PR
-	def_bool y
+	bool "KVM support without using hypervisor mode in host"
 	depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
 	select KVM_BOOK3S_PR_POSSIBLE
+	---help---
+	  Support running guest kernels in virtual machines on processors
+	  without using hypervisor mode in the host, by running the
+	  guest in user mode (problem state) and emulating all
+	  privileged instructions and registers.
+
+	  This is not as fast as using hypervisor mode, but works on
+	  machines where hypervisor mode is not available or not usable,
+	  and can emulate processors that are different from the host
+	  processor, including emulating 32-bit processors on a 64-bit
+	  host.
 
 config KVM_BOOKE_HV
 	bool
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 104e8dcf4336..fa17b337bb96 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -57,7 +57,6 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
 	book3s_64_vio_hv.o
 
 kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
-	$(KVM)/coalesced_mmio.o \
 	fpu.o \
 	book3s_paired_singles.o \
 	book3s_pr.o \
@@ -69,10 +68,15 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
 	book3s_64_mmu.o \
 	book3s_32_mmu.o
 
-kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) += \
+ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+kvm-book3s_64-module-objs := \
+	$(KVM)/coalesced_mmio.o
+
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
 	book3s_rmhandlers.o
+endif
 
-kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV)  += \
 	book3s_hv.o \
 	book3s_hv_interrupts.o \
 	book3s_64_mmu_hv.o
@@ -91,7 +95,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) += \
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
-kvm-book3s_64-module-objs := \
+kvm-book3s_64-module-objs += \
 	$(KVM)/kvm_main.o \
 	$(KVM)/eventfd.o \
 	powerpc.o \
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 7057a02f0906..852989a9bad3 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -20,9 +20,10 @@
 #include <linux/export.h>
 #include <asm/kvm_book3s.h>
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
-#else
+#endif
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
 EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
 #ifdef CONFIG_ALTIVEC
-- 
cgit v1.2.3


From 3a167beac07cba597856c12b87638a06b0d53db7 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:17:53 +0530
Subject: kvm: powerpc: Add kvmppc_ops callback

This patch add a new callback kvmppc_ops. This will help us in enabling
both HV and PR KVM together in the same kernel. The actual change to
enable them together is done in the later patch in the series.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[agraf: squash in booke changes]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h |   1 -
 arch/powerpc/include/asm/kvm_ppc.h    |  89 ++++++++++----
 arch/powerpc/kernel/exceptions-64s.S  |   2 +-
 arch/powerpc/kvm/44x.c                |  55 ++++++---
 arch/powerpc/kvm/44x_emulate.c        |   8 +-
 arch/powerpc/kvm/44x_tlb.c            |   2 +-
 arch/powerpc/kvm/book3s.c             | 145 +++++++++++++++++++++-
 arch/powerpc/kvm/book3s.h             |  32 +++++
 arch/powerpc/kvm/book3s_32_mmu_host.c |   2 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c |   2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c   |  17 ++-
 arch/powerpc/kvm/book3s_emulate.c     |   8 +-
 arch/powerpc/kvm/book3s_hv.c          | 220 ++++++++++++++++++++++++----------
 arch/powerpc/kvm/book3s_interrupts.S  |   2 +-
 arch/powerpc/kvm/book3s_pr.c          | 194 +++++++++++++++++++-----------
 arch/powerpc/kvm/book3s_xics.c        |   4 +-
 arch/powerpc/kvm/booke.c              |  47 +++++++-
 arch/powerpc/kvm/booke.h              |  24 ++++
 arch/powerpc/kvm/e500.c               |  53 +++++---
 arch/powerpc/kvm/e500_emulate.c       |   8 +-
 arch/powerpc/kvm/e500_mmu.c           |   2 +-
 arch/powerpc/kvm/e500mc.c             |  54 ++++++---
 arch/powerpc/kvm/emulate.c            |   6 +-
 arch/powerpc/kvm/powerpc.c            |  58 +++------
 24 files changed, 748 insertions(+), 287 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s.h

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 99ef8711e906..315a5d692417 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -124,7 +124,6 @@ extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask)
 extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask);
 extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end);
 extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr);
-extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr);
 extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 1823f38906c6..326033c99385 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -106,13 +106,6 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
-
-extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                                  unsigned int op, int *advance);
-extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
-				     ulong val);
-extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
-				     ulong *val);
 extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_booke_init(void);
@@ -135,8 +128,6 @@ extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 				struct kvm_create_spapr_tce *args);
 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba, unsigned long tce);
-extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
-				struct kvm_allocate_rma *rma);
 extern struct kvm_rma_info *kvm_alloc_rma(void);
 extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
@@ -177,6 +168,66 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
 extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
 extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
 
+union kvmppc_one_reg {
+	u32	wval;
+	u64	dval;
+	vector128 vval;
+	u64	vsxval[2];
+	struct {
+		u64	addr;
+		u64	length;
+	}	vpaval;
+};
+
+struct kvmppc_ops {
+	int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+	int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+	int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id,
+			   union kvmppc_one_reg *val);
+	int (*set_one_reg)(struct kvm_vcpu *vcpu, u64 id,
+			   union kvmppc_one_reg *val);
+	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
+	void (*vcpu_put)(struct kvm_vcpu *vcpu);
+	void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
+	int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id);
+	void (*vcpu_free)(struct kvm_vcpu *vcpu);
+	int (*check_requests)(struct kvm_vcpu *vcpu);
+	int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
+	void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
+	int (*prepare_memory_region)(struct kvm *kvm,
+				     struct kvm_memory_slot *memslot,
+				     struct kvm_userspace_memory_region *mem);
+	void (*commit_memory_region)(struct kvm *kvm,
+				     struct kvm_userspace_memory_region *mem,
+				     const struct kvm_memory_slot *old);
+	int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
+	int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
+			   unsigned long end);
+	int (*age_hva)(struct kvm *kvm, unsigned long hva);
+	int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
+	void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
+	void (*mmu_destroy)(struct kvm_vcpu *vcpu);
+	void (*free_memslot)(struct kvm_memory_slot *free,
+			     struct kvm_memory_slot *dont);
+	int (*create_memslot)(struct kvm_memory_slot *slot,
+			      unsigned long npages);
+	int (*init_vm)(struct kvm *kvm);
+	void (*destroy_vm)(struct kvm *kvm);
+	int (*check_processor_compat)(void);
+	int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info);
+	int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu,
+			  unsigned int inst, int *advance);
+	int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
+	int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
+	void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu);
+	long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
+			      unsigned long arg);
+
+};
+
+extern struct kvmppc_ops *kvmppc_ops;
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
@@ -210,17 +261,6 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
 	return r;
 }
 
-union kvmppc_one_reg {
-	u32	wval;
-	u64	dval;
-	vector128 vval;
-	u64	vsxval[2];
-	struct {
-		u64	addr;
-		u64	length;
-	}	vpaval;
-};
-
 #define one_reg_size(id)	\
 	(1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
 
@@ -245,10 +285,10 @@ union kvmppc_one_reg {
 	__v;					\
 })
 
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
-void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
@@ -281,7 +321,10 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 	paca[cpu].kvm_hstate.host_ipi = host_ipi;
 }
 
-extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	kvmppc_ops->fast_vcpu_kick(vcpu);
+}
 
 #else
 static inline void __init kvm_cma_reserve(void)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 3d1c42b03c89..9f905e40922e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -126,7 +126,7 @@ BEGIN_FTR_SECTION
 	bgt	cr1,.
 	GET_PACA(r13)
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	li	r0,KVM_HWTHREAD_IN_KERNEL
 	stb	r0,HSTATE_HWTHREAD_STATE(r13)
 	/* Order setting hwthread_state vs. testing hwthread_req */
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 2f5c6b6d6877..a765bcd74fbb 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -31,13 +31,13 @@
 #include "44x_tlb.h"
 #include "booke.h"
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvmppc_booke_vcpu_load(vcpu, cpu);
 	kvmppc_44x_tlb_load(vcpu);
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu)
 {
 	kvmppc_44x_tlb_put(vcpu);
 	kvmppc_booke_vcpu_put(vcpu);
@@ -114,29 +114,32 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu,
+				      struct kvm_sregs *sregs)
 {
-	kvmppc_get_sregs_ivor(vcpu, sregs);
+	return kvmppc_get_sregs_ivor(vcpu, sregs);
 }
 
-int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu,
+				     struct kvm_sregs *sregs)
 {
 	return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
-			union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
+				  union kvmppc_one_reg *val)
 {
 	return -EINVAL;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
-		       union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
+				  union kvmppc_one_reg *val)
 {
 	return -EINVAL;
 }
 
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm,
+						    unsigned int id)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x;
 	struct kvm_vcpu *vcpu;
@@ -167,7 +170,7 @@ out:
 	return ERR_PTR(err);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 
@@ -176,24 +179,46 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
 }
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_44x(struct kvm *kvm)
 {
 	return 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_44x(struct kvm *kvm)
 {
 }
 
+static struct kvmppc_ops kvm_ops_44x = {
+	.get_sregs = kvmppc_core_get_sregs_44x,
+	.set_sregs = kvmppc_core_set_sregs_44x,
+	.get_one_reg = kvmppc_get_one_reg_44x,
+	.set_one_reg = kvmppc_set_one_reg_44x,
+	.vcpu_load   = kvmppc_core_vcpu_load_44x,
+	.vcpu_put    = kvmppc_core_vcpu_put_44x,
+	.vcpu_create = kvmppc_core_vcpu_create_44x,
+	.vcpu_free   = kvmppc_core_vcpu_free_44x,
+	.mmu_destroy  = kvmppc_mmu_destroy_44x,
+	.init_vm = kvmppc_core_init_vm_44x,
+	.destroy_vm = kvmppc_core_destroy_vm_44x,
+	.emulate_op = kvmppc_core_emulate_op_44x,
+	.emulate_mtspr = kvmppc_core_emulate_mtspr_44x,
+	.emulate_mfspr = kvmppc_core_emulate_mfspr_44x,
+};
+
 static int __init kvmppc_44x_init(void)
 {
 	int r;
 
 	r = kvmppc_booke_init();
 	if (r)
-		return r;
+		goto err_out;
 
-	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
+	r = kvm_init(&kvm_ops_44x, sizeof(struct kvmppc_vcpu_44x),
+		     0, THIS_MODULE);
+	if (r)
+		goto err_out;
+err_out:
+	return r;
 }
 
 static void __exit kvmppc_44x_exit(void)
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 35ec0a8547da..92c9ab4bcfec 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -91,8 +91,8 @@ static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
 	return EMULATE_DONE;
 }
 
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                           unsigned int inst, int *advance)
+int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
+			       unsigned int inst, int *advance)
 {
 	int emulated = EMULATE_DONE;
 	int dcrn = get_dcrn(inst);
@@ -152,7 +152,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return emulated;
 }
 
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 {
 	int emulated = EMULATE_DONE;
 
@@ -172,7 +172,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	return emulated;
 }
 
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 {
 	int emulated = EMULATE_DONE;
 
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ed0385448148..0deef1082e02 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -268,7 +268,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
 	trace_kvm_stlb_inval(stlb_index);
 }
 
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 807103ad2628..784a1d5ccb04 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -422,6 +422,18 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return kvmppc_ops->get_sregs(vcpu, sregs);
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return kvmppc_ops->set_sregs(vcpu, sregs);
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
@@ -498,8 +510,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	if (size > sizeof(val))
 		return -EINVAL;
 
-	r = kvmppc_get_one_reg(vcpu, reg->id, &val);
-
+	r = kvmppc_ops->get_one_reg(vcpu, reg->id, &val);
 	if (r == -EINVAL) {
 		r = 0;
 		switch (reg->id) {
@@ -578,8 +589,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
 		return -EFAULT;
 
-	r = kvmppc_set_one_reg(vcpu, reg->id, &val);
-
+	r = kvmppc_ops->set_one_reg(vcpu, reg->id, &val);
 	if (r == -EINVAL) {
 		r = 0;
 		switch (reg->id) {
@@ -638,6 +648,26 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	return r;
 }
 
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	kvmppc_ops->vcpu_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	kvmppc_ops->vcpu_put(vcpu);
+}
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+{
+	kvmppc_ops->set_msr(vcpu, msr);
+}
+
+int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+	return kvmppc_ops->vcpu_run(kvm_run, vcpu);
+}
+
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
                                   struct kvm_translation *tr)
 {
@@ -657,3 +687,110 @@ void kvmppc_decrementer_func(unsigned long data)
 	kvmppc_core_queue_dec(vcpu);
 	kvm_vcpu_kick(vcpu);
 }
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	return kvmppc_ops->vcpu_create(kvm, id);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	kvmppc_ops->vcpu_free(vcpu);
+}
+
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+{
+	return kvmppc_ops->check_requests(vcpu);
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+	return kvmppc_ops->get_dirty_log(kvm, log);
+}
+
+void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+			      struct kvm_memory_slot *dont)
+{
+	kvmppc_ops->free_memslot(free, dont);
+}
+
+int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+			       unsigned long npages)
+{
+	return kvmppc_ops->create_memslot(slot, npages);
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+	kvmppc_ops->flush_memslot(kvm, memslot);
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
+				struct kvm_userspace_memory_region *mem)
+{
+	return kvmppc_ops->prepare_memory_region(kvm, memslot, mem);
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				const struct kvm_memory_slot *old)
+{
+	kvmppc_ops->commit_memory_region(kvm, mem, old);
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+	return kvmppc_ops->unmap_hva(kvm, hva);
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	return kvmppc_ops->unmap_hva_range(kvm, start, end);
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return kvmppc_ops->age_hva(kvm, hva);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return kvmppc_ops->test_age_hva(kvm, hva);
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+	kvmppc_ops->set_spte_hva(kvm, hva, pte);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvmppc_ops->mmu_destroy(vcpu);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+
+#ifdef CONFIG_PPC64
+	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
+#endif
+
+	return kvmppc_ops->init_vm(kvm);
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+	kvmppc_ops->destroy_vm(kvm);
+
+#ifdef CONFIG_PPC64
+	kvmppc_rtas_tokens_free(kvm);
+	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
+#endif
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+	return kvmppc_ops->check_processor_compat();
+}
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
new file mode 100644
index 000000000000..9e5b3a341943
--- /dev/null
+++ b/arch/powerpc/kvm/book3s.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_BOOK3S_H__
+#define __POWERPC_KVM_BOOK3S_H__
+
+extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
+					 struct kvm_memory_slot *memslot);
+extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
+				  unsigned long end);
+extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+				     unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
+					int sprn, ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
+					int sprn, ulong *spr_val);
+
+#endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 673322329238..3a0abd2e5a15 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -349,7 +349,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
 	svcpu_put(svcpu);
 }
 
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
 {
 	int i;
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index e2ab8a747fbe..819672c9d787 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -378,7 +378,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
 	svcpu_put(svcpu);
 }
 
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
 {
 	kvmppc_mmu_hpte_destroy(vcpu);
 	__destroy_context(to_book3s(vcpu)->context_id[0]);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 394fef820f0c..f3ff587a8b7d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -260,10 +260,6 @@ int kvmppc_mmu_hv_init(void)
 	return 0;
 }
 
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
-{
-}
-
 static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
 {
 	kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
@@ -906,21 +902,22 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	return 0;
 }
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
 {
 	if (kvm->arch.using_mmu_notifiers)
 		kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 	return 0;
 }
 
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	if (kvm->arch.using_mmu_notifiers)
 		kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
 	return 0;
 }
 
-void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
+				  struct kvm_memory_slot *memslot)
 {
 	unsigned long *rmapp;
 	unsigned long gfn;
@@ -994,7 +991,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	return ret;
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
 {
 	if (!kvm->arch.using_mmu_notifiers)
 		return 0;
@@ -1032,14 +1029,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	return ret;
 }
 
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
 {
 	if (!kvm->arch.using_mmu_notifiers)
 		return 0;
 	return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
 }
 
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
 	if (!kvm->arch.using_mmu_notifiers)
 		return;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 34044b111daa..b9841ad844fb 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -86,8 +86,8 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
 	return true;
 }
 
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                           unsigned int inst, int *advance)
+int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+			      unsigned int inst, int *advance)
 {
 	int emulated = EMULATE_DONE;
 	int rt = get_rt(inst);
@@ -345,7 +345,7 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
 	return bat;
 }
 
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 {
 	int emulated = EMULATE_DONE;
 
@@ -468,7 +468,7 @@ unprivileged:
 	return emulated;
 }
 
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 {
 	int emulated = EMULATE_DONE;
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b4d5b3b06769..3d7079a08d2a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -53,6 +53,8 @@
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
 
+#include "book3s.h"
+
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
@@ -66,7 +68,7 @@
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
-void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
 	int me;
 	int cpu = vcpu->cpu;
@@ -125,7 +127,7 @@ void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
  * purely defensive; they should never fail.)
  */
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
@@ -143,7 +145,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	spin_unlock(&vcpu->arch.tbacct_lock);
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
@@ -155,13 +157,13 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 	spin_unlock(&vcpu->arch.tbacct_lock);
 }
 
-void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
 {
 	vcpu->arch.shregs.msr = msr;
 	kvmppc_end_cede(vcpu);
 }
 
-void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
 {
 	vcpu->arch.pvr = pvr;
 }
@@ -614,8 +616,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	return RESUME_GUEST;
 }
 
-static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
-			      struct task_struct *tsk)
+static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+				 struct task_struct *tsk)
 {
 	int r = RESUME_HOST;
 
@@ -717,8 +719,8 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return r;
 }
 
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-				  struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
+					    struct kvm_sregs *sregs)
 {
 	int i;
 
@@ -732,12 +734,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-				  struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
+					    struct kvm_sregs *sregs)
 {
 	int i, j;
 
-	kvmppc_set_pvr(vcpu, sregs->pvr);
+	kvmppc_set_pvr_hv(vcpu, sregs->pvr);
 
 	j = 0;
 	for (i = 0; i < vcpu->arch.slb_nr; i++) {
@@ -767,7 +769,8 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
 	spin_unlock(&vc->lock);
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+				 union kvmppc_one_reg *val)
 {
 	int r = 0;
 	long int i;
@@ -866,7 +869,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 	return r;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+				 union kvmppc_one_reg *val)
 {
 	int r = 0;
 	long int i;
@@ -979,14 +983,8 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 	return r;
 }
 
-int kvmppc_core_check_processor_compat(void)
-{
-	if (cpu_has_feature(CPU_FTR_HVMODE))
-		return 0;
-	return -EIO;
-}
-
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+						   unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
 	int err = -EINVAL;
@@ -1010,8 +1008,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	vcpu->arch.mmcr[0] = MMCR0_FC;
 	vcpu->arch.ctrl = CTRL_RUNLATCH;
 	/* default to host PVR, since we can't spoof it */
-	vcpu->arch.pvr = mfspr(SPRN_PVR);
-	kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+	kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
 	spin_lock_init(&vcpu->arch.vpa_update_lock);
 	spin_lock_init(&vcpu->arch.tbacct_lock);
 	vcpu->arch.busy_preempt = TB_NIL;
@@ -1064,7 +1061,7 @@ static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
 					vpa->dirty);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
 {
 	spin_lock(&vcpu->arch.vpa_update_lock);
 	unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
@@ -1075,6 +1072,12 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
+static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
+{
+	/* Indicate we want to get back into the guest */
+	return 1;
+}
+
 static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 {
 	unsigned long dec_nsec, now;
@@ -1356,8 +1359,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 		ret = RESUME_GUEST;
 		if (vcpu->arch.trap)
-			ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
-						 vcpu->arch.run_task);
+			ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
+						    vcpu->arch.run_task);
 
 		vcpu->arch.ret = ret;
 		vcpu->arch.trap = 0;
@@ -1516,7 +1519,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	return vcpu->arch.ret;
 }
 
-int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	int r;
 	int srcu_idx;
@@ -1638,7 +1641,8 @@ static const struct file_operations kvm_rma_fops = {
 	.release	= kvm_rma_release,
 };
 
-long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
+static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
+				      struct kvm_allocate_rma *ret)
 {
 	long fd;
 	struct kvm_rma_info *ri;
@@ -1684,7 +1688,8 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
 	(*sps)++;
 }
 
-int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
+					 struct kvm_ppc_smmu_info *info)
 {
 	struct kvm_ppc_one_seg_page_size *sps;
 
@@ -1705,7 +1710,8 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
+					 struct kvm_dirty_log *log)
 {
 	struct kvm_memory_slot *memslot;
 	int r;
@@ -1759,8 +1765,8 @@ static void unpin_slot(struct kvm_memory_slot *memslot)
 	}
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
-			      struct kvm_memory_slot *dont)
+static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
+					struct kvm_memory_slot *dont)
 {
 	if (!dont || free->arch.rmap != dont->arch.rmap) {
 		vfree(free->arch.rmap);
@@ -1773,8 +1779,8 @@ void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
 	}
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
-			       unsigned long npages)
+static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
+					 unsigned long npages)
 {
 	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
 	if (!slot->arch.rmap)
@@ -1784,9 +1790,9 @@ int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
 	return 0;
 }
 
-int kvmppc_core_prepare_memory_region(struct kvm *kvm,
-				      struct kvm_memory_slot *memslot,
-				      struct kvm_userspace_memory_region *mem)
+static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
+					struct kvm_memory_slot *memslot,
+					struct kvm_userspace_memory_region *mem)
 {
 	unsigned long *phys;
 
@@ -1802,9 +1808,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 	return 0;
 }
 
-void kvmppc_core_commit_memory_region(struct kvm *kvm,
-				      struct kvm_userspace_memory_region *mem,
-				      const struct kvm_memory_slot *old)
+static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				const struct kvm_memory_slot *old)
 {
 	unsigned long npages = mem->memory_size >> PAGE_SHIFT;
 	struct kvm_memory_slot *memslot;
@@ -1847,6 +1853,11 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
 	}
 }
 
+static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
+{
+	return;
+}
+
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
@@ -1994,7 +2005,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 	goto out_srcu;
 }
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 {
 	unsigned long lpcr, lpid;
 
@@ -2012,9 +2023,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 	 */
 	cpumask_setall(&kvm->arch.need_tlb_flush);
 
-	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
-	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
-
 	kvm->arch.rma = NULL;
 
 	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
@@ -2059,7 +2067,7 @@ static void kvmppc_free_vcores(struct kvm *kvm)
 	kvm->arch.online_vcores = 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 {
 	uninhibit_secondary_onlining();
 
@@ -2069,39 +2077,127 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 		kvm->arch.rma = NULL;
 	}
 
-	kvmppc_rtas_tokens_free(kvm);
-
 	kvmppc_free_hpt(kvm);
-	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
 }
 
-/* These are stubs for now */
-void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
+/* We don't need to emulate any privileged instructions or dcbz */
+static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+				     unsigned int inst, int *advance)
 {
+	return EMULATE_FAIL;
 }
 
-/* We don't need to emulate any privileged instructions or dcbz */
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                           unsigned int inst, int *advance)
+static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
+					ulong spr_val)
 {
 	return EMULATE_FAIL;
 }
 
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
+					ulong *spr_val)
 {
 	return EMULATE_FAIL;
 }
 
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+static int kvmppc_core_check_processor_compat_hv(void)
 {
-	return EMULATE_FAIL;
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return -EIO;
+	return 0;
 }
 
-static int kvmppc_book3s_hv_init(void)
+static long kvm_arch_vm_ioctl_hv(struct file *filp,
+				 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm __maybe_unused = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	long r;
+
+	switch (ioctl) {
+
+	case KVM_ALLOCATE_RMA: {
+		struct kvm_allocate_rma rma;
+		struct kvm *kvm = filp->private_data;
+
+		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
+		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
+			r = -EFAULT;
+		break;
+	}
+
+	case KVM_PPC_ALLOCATE_HTAB: {
+		u32 htab_order;
+
+		r = -EFAULT;
+		if (get_user(htab_order, (u32 __user *)argp))
+			break;
+		r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
+		if (r)
+			break;
+		r = -EFAULT;
+		if (put_user(htab_order, (u32 __user *)argp))
+			break;
+		r = 0;
+		break;
+	}
+
+	case KVM_PPC_GET_HTAB_FD: {
+		struct kvm_get_htab_fd ghf;
+
+		r = -EFAULT;
+		if (copy_from_user(&ghf, argp, sizeof(ghf)))
+			break;
+		r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
+		break;
+	}
+
+	default:
+		r = -ENOTTY;
+	}
+
+	return r;
+}
+
+static struct kvmppc_ops kvmppc_hv_ops = {
+	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
+	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
+	.get_one_reg = kvmppc_get_one_reg_hv,
+	.set_one_reg = kvmppc_set_one_reg_hv,
+	.vcpu_load   = kvmppc_core_vcpu_load_hv,
+	.vcpu_put    = kvmppc_core_vcpu_put_hv,
+	.set_msr     = kvmppc_set_msr_hv,
+	.vcpu_run    = kvmppc_vcpu_run_hv,
+	.vcpu_create = kvmppc_core_vcpu_create_hv,
+	.vcpu_free   = kvmppc_core_vcpu_free_hv,
+	.check_requests = kvmppc_core_check_requests_hv,
+	.get_dirty_log  = kvm_vm_ioctl_get_dirty_log_hv,
+	.flush_memslot  = kvmppc_core_flush_memslot_hv,
+	.prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
+	.commit_memory_region  = kvmppc_core_commit_memory_region_hv,
+	.unmap_hva = kvm_unmap_hva_hv,
+	.unmap_hva_range = kvm_unmap_hva_range_hv,
+	.age_hva  = kvm_age_hva_hv,
+	.test_age_hva = kvm_test_age_hva_hv,
+	.set_spte_hva = kvm_set_spte_hva_hv,
+	.mmu_destroy  = kvmppc_mmu_destroy_hv,
+	.free_memslot = kvmppc_core_free_memslot_hv,
+	.create_memslot = kvmppc_core_create_memslot_hv,
+	.init_vm =  kvmppc_core_init_vm_hv,
+	.destroy_vm = kvmppc_core_destroy_vm_hv,
+	.check_processor_compat = kvmppc_core_check_processor_compat_hv,
+	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
+	.emulate_op = kvmppc_core_emulate_op_hv,
+	.emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
+	.emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
+	.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
+	.arch_vm_ioctl  = kvm_arch_vm_ioctl_hv,
+};
+
+static int kvmppc_book3s_init_hv(void)
 {
 	int r;
 
-	r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	r = kvm_init(&kvmppc_hv_ops, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 
 	if (r)
 		return r;
@@ -2111,10 +2207,10 @@ static int kvmppc_book3s_hv_init(void)
 	return r;
 }
 
-static void kvmppc_book3s_hv_exit(void)
+static void kvmppc_book3s_exit_hv(void)
 {
 	kvm_exit();
 }
 
-module_init(kvmppc_book3s_hv_init);
-module_exit(kvmppc_book3s_hv_exit);
+module_init(kvmppc_book3s_init_hv);
+module_exit(kvmppc_book3s_exit_hv);
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 38166ab2f19c..f4dd041c14ea 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -181,7 +181,7 @@ kvm_start_lightweight:
 
 	/* Restore r3 (kvm_run) and r4 (vcpu) */
 	REST_2GPRS(3, r1)
-	bl	FUNC(kvmppc_handle_exit)
+	bl	FUNC(kvmppc_handle_exit_pr)
 
 	/* If RESUME_GUEST, get back in the loop */
 	cmpwi	r3, RESUME_GUEST
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 6075dbd0b364..05d28cf4a06c 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -41,6 +41,7 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 
+#include "book3s.h"
 #include "trace.h"
 
 /* #define EXIT_DEBUG */
@@ -56,7 +57,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 #define HW_PAGE_SIZE PAGE_SIZE
 #endif
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
@@ -70,7 +71,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 #endif
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
@@ -137,7 +138,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 	vcpu->arch.last_inst   = svcpu->last_inst;
 }
 
-int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
 {
 	int r = 1; /* Indicate we want to get back into the guest */
 
@@ -180,7 +181,7 @@ static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
 	}
 }
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
 {
 	trace_kvm_unmap_hva(hva);
 
@@ -189,26 +190,27 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 	return 0;
 }
 
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
+				  unsigned long end)
 {
 	do_kvm_unmap_hva(kvm, start, end);
 
 	return 0;
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
 {
 	/* XXX could be more clever ;) */
 	return 0;
 }
 
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)
 {
 	/* XXX could be more clever ;) */
 	return 0;
 }
 
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
 	/* The page will get remapped properly on its next fault */
 	do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
@@ -233,7 +235,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 	vcpu->arch.shadow_msr = smsr;
 }
 
-void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 {
 	ulong old_msr = vcpu->arch.shared->msr;
 
@@ -293,7 +295,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 }
 
-void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
 {
 	u32 host_pvr;
 
@@ -725,8 +727,8 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
 	current->thread.regs->msr |= lost_ext;
 }
 
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                       unsigned int exit_nr)
+int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+			  unsigned int exit_nr)
 {
 	int r = RESUME_HOST;
 	int s;
@@ -1034,8 +1036,8 @@ program_interrupt:
 	return r;
 }
 
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
+					    struct kvm_sregs *sregs)
 {
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	int i;
@@ -1061,13 +1063,13 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
+					    struct kvm_sregs *sregs)
 {
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	int i;
 
-	kvmppc_set_pvr(vcpu, sregs->pvr);
+	kvmppc_set_pvr_pr(vcpu, sregs->pvr);
 
 	vcpu3s->sdr1 = sregs->u.s.sdr1;
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
@@ -1097,7 +1099,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+				 union kvmppc_one_reg *val)
 {
 	int r = 0;
 
@@ -1126,7 +1129,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 	return r;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+				 union kvmppc_one_reg *val)
 {
 	int r = 0;
 
@@ -1156,12 +1160,8 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 	return r;
 }
 
-int kvmppc_core_check_processor_compat(void)
-{
-	return 0;
-}
-
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
+						   unsigned int id)
 {
 	struct kvmppc_vcpu_book3s *vcpu_book3s;
 	struct kvm_vcpu *vcpu;
@@ -1208,7 +1208,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	/* default to book3s_32 (750) */
 	vcpu->arch.pvr = 0x84202;
 #endif
-	kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+	kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
 	vcpu->arch.slb_nr = 64;
 
 	vcpu->arch.shadow_msr = MSR_USER64;
@@ -1233,7 +1233,7 @@ out:
 	return ERR_PTR(err);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 
@@ -1246,7 +1246,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
-int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret;
 	double fpr[32][TS_FPRWIDTH];
@@ -1355,8 +1355,8 @@ out:
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-				      struct kvm_dirty_log *log)
+static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
+					 struct kvm_dirty_log *log)
 {
 	struct kvm_memory_slot *memslot;
 	struct kvm_vcpu *vcpu;
@@ -1391,8 +1391,42 @@ out:
 	return r;
 }
 
+static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
+					 struct kvm_memory_slot *memslot)
+{
+	return;
+}
+
+static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
+					struct kvm_memory_slot *memslot,
+					struct kvm_userspace_memory_region *mem)
+{
+	return 0;
+}
+
+static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				const struct kvm_memory_slot *old)
+{
+	return;
+}
+
+static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
+					struct kvm_memory_slot *dont)
+{
+	return;
+}
+
+static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
+					 unsigned long npages)
+{
+	return 0;
+}
+
+
 #ifdef CONFIG_PPC64
-int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+					 struct kvm_ppc_smmu_info *info)
 {
 	long int i;
 	struct kvm_vcpu *vcpu;
@@ -1436,45 +1470,20 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
 
 	return 0;
 }
-#endif /* CONFIG_PPC64 */
-
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
-			      struct kvm_memory_slot *dont)
-{
-}
-
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
-			       unsigned long npages)
-{
-	return 0;
-}
-
-int kvmppc_core_prepare_memory_region(struct kvm *kvm,
-				      struct kvm_memory_slot *memslot,
-				      struct kvm_userspace_memory_region *mem)
-{
-	return 0;
-}
-
-void kvmppc_core_commit_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem,
-				const struct kvm_memory_slot *old)
-{
-}
-
-void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+#else
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+					 struct kvm_ppc_smmu_info *info)
 {
+	/* We should not get called */
+	BUG();
 }
+#endif /* CONFIG_PPC64 */
 
 static unsigned int kvm_global_user_count = 0;
 static DEFINE_SPINLOCK(kvm_global_user_count_lock);
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_pr(struct kvm *kvm)
 {
-#ifdef CONFIG_PPC64
-	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
-	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
-#endif
 	mutex_init(&kvm->arch.hpt_mutex);
 
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
@@ -1486,7 +1495,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 	return 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
 {
 #ifdef CONFIG_PPC64
 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
@@ -1501,11 +1510,58 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 	}
 }
 
-static int kvmppc_book3s_init(void)
+static int kvmppc_core_check_processor_compat_pr(void)
+{
+	/* we are always compatible */
+	return 0;
+}
+
+static long kvm_arch_vm_ioctl_pr(struct file *filp,
+				 unsigned int ioctl, unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static struct kvmppc_ops kvmppc_pr_ops = {
+	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
+	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
+	.get_one_reg = kvmppc_get_one_reg_pr,
+	.set_one_reg = kvmppc_set_one_reg_pr,
+	.vcpu_load   = kvmppc_core_vcpu_load_pr,
+	.vcpu_put    = kvmppc_core_vcpu_put_pr,
+	.set_msr     = kvmppc_set_msr_pr,
+	.vcpu_run    = kvmppc_vcpu_run_pr,
+	.vcpu_create = kvmppc_core_vcpu_create_pr,
+	.vcpu_free   = kvmppc_core_vcpu_free_pr,
+	.check_requests = kvmppc_core_check_requests_pr,
+	.get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
+	.flush_memslot = kvmppc_core_flush_memslot_pr,
+	.prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
+	.commit_memory_region = kvmppc_core_commit_memory_region_pr,
+	.unmap_hva = kvm_unmap_hva_pr,
+	.unmap_hva_range = kvm_unmap_hva_range_pr,
+	.age_hva  = kvm_age_hva_pr,
+	.test_age_hva = kvm_test_age_hva_pr,
+	.set_spte_hva = kvm_set_spte_hva_pr,
+	.mmu_destroy  = kvmppc_mmu_destroy_pr,
+	.free_memslot = kvmppc_core_free_memslot_pr,
+	.create_memslot = kvmppc_core_create_memslot_pr,
+	.init_vm = kvmppc_core_init_vm_pr,
+	.destroy_vm = kvmppc_core_destroy_vm_pr,
+	.check_processor_compat = kvmppc_core_check_processor_compat_pr,
+	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
+	.emulate_op = kvmppc_core_emulate_op_pr,
+	.emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
+	.emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
+	.fast_vcpu_kick = kvm_vcpu_kick,
+	.arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
+};
+
+static int kvmppc_book3s_init_pr(void)
 {
 	int r;
 
-	r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	r = kvm_init(&kvmppc_pr_ops, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 
 	if (r)
 		return r;
@@ -1515,11 +1571,11 @@ static int kvmppc_book3s_init(void)
 	return r;
 }
 
-static void kvmppc_book3s_exit(void)
+static void kvmppc_book3s_exit_pr(void)
 {
 	kvmppc_mmu_hpte_sysexit();
 	kvm_exit();
 }
 
-module_init(kvmppc_book3s_init);
-module_exit(kvmppc_book3s_exit);
+module_init(kvmppc_book3s_init_pr);
+module_exit(kvmppc_book3s_exit_pr);
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index a3a5cb8ee7ea..51ed1ea440e0 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1250,13 +1250,13 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
 
 	xics_debugfs_init(xics);
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
 		/* Enable real mode support */
 		xics->real_mode = ENABLE_REALMODE;
 		xics->real_mode_dbg = DEBUG_REALMODE;
 	}
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 	return 0;
 }
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8b6a790c0562..e5f8ba793c31 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1415,7 +1415,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
 	sregs->u.e.features |= KVM_SREGS_E_IVOR;
 
@@ -1435,6 +1435,7 @@ void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
 	sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
 	sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+	return 0;
 }
 
 int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
@@ -1469,8 +1470,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 	get_sregs_base(vcpu, sregs);
 	get_sregs_arch206(vcpu, sregs);
-	kvmppc_core_get_sregs(vcpu, sregs);
-	return 0;
+	return kvmppc_ops->get_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
@@ -1489,7 +1489,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (ret < 0)
 		return ret;
 
-	return kvmppc_core_set_sregs(vcpu, sregs);
+	return kvmppc_ops->set_sregs(vcpu, sregs);
 }
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
@@ -1546,7 +1546,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		val = get_reg_val(reg->id, vcpu->arch.vrsave);
 		break;
 	default:
-		r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+		r = kvmppc_ops->get_one_reg(vcpu, reg->id, &val);
 		break;
 	}
 
@@ -1629,7 +1629,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		vcpu->arch.vrsave = set_reg_val(reg->id, val);
 		break;
 	default:
-		r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+		r = kvmppc_ops->set_one_reg(vcpu, reg->id, &val);
 		break;
 	}
 
@@ -1907,6 +1907,41 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
 	kvmppc_clear_dbsr();
 }
 
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvmppc_ops->mmu_destroy(vcpu);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+	return kvmppc_ops->init_vm(kvm);
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	return kvmppc_ops->vcpu_create(kvm, id);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	kvmppc_ops->vcpu_free(vcpu);
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+	kvmppc_ops->destroy_vm(kvm);
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	kvmppc_ops->vcpu_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	kvmppc_ops->vcpu_put(vcpu);
+}
+
 int __init kvmppc_booke_init(void)
 {
 #ifndef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index a1ff67d04022..09bfd9bc7cf8 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -99,6 +99,30 @@ enum int_class {
 
 void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
 
+extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
+				      unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn,
+					 ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn,
+					 ulong *spr_val);
+extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
+				       struct kvm_vcpu *vcpu,
+				       unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
+					  ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
+					  ulong *spr_val);
+extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
+				       struct kvm_vcpu *vcpu,
+				       unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
+					  ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
+					  ulong *spr_val);
+
 /*
  * Load up guest vcpu FP state if it's needed.
  * It also set the MSR_FP in thread so that host know
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index ce6b73c29612..d225d5ebddcc 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -305,7 +305,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 {
 }
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvmppc_booke_vcpu_load(vcpu, cpu);
 
@@ -313,7 +313,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SPE
 	if (vcpu->arch.shadow_msr & MSR_SPE)
@@ -367,7 +367,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_get_sregs_e500(struct kvm_vcpu *vcpu,
+				      struct kvm_sregs *sregs)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
@@ -388,9 +389,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
 	kvmppc_get_sregs_ivor(vcpu, sregs);
 	kvmppc_get_sregs_e500_tlb(vcpu, sregs);
+	return 0;
 }
 
-int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_set_sregs_e500(struct kvm_vcpu *vcpu,
+				      struct kvm_sregs *sregs)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	int ret;
@@ -425,21 +428,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
-			union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
+				   union kvmppc_one_reg *val)
 {
 	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
 	return r;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
-		       union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
+				   union kvmppc_one_reg *val)
 {
 	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
 	return r;
 }
 
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
+						     unsigned int id)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500;
 	struct kvm_vcpu *vcpu;
@@ -481,7 +485,7 @@ out:
 	return ERR_PTR(err);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
@@ -492,15 +496,32 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 	kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
 }
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_e500(struct kvm *kvm)
 {
 	return 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_e500(struct kvm *kvm)
 {
 }
 
+static struct kvmppc_ops kvm_ops_e500 = {
+	.get_sregs = kvmppc_core_get_sregs_e500,
+	.set_sregs = kvmppc_core_set_sregs_e500,
+	.get_one_reg = kvmppc_get_one_reg_e500,
+	.set_one_reg = kvmppc_set_one_reg_e500,
+	.vcpu_load   = kvmppc_core_vcpu_load_e500,
+	.vcpu_put    = kvmppc_core_vcpu_put_e500,
+	.vcpu_create = kvmppc_core_vcpu_create_e500,
+	.vcpu_free   = kvmppc_core_vcpu_free_e500,
+	.mmu_destroy  = kvmppc_mmu_destroy_e500,
+	.init_vm = kvmppc_core_init_vm_e500,
+	.destroy_vm = kvmppc_core_destroy_vm_e500,
+	.emulate_op = kvmppc_core_emulate_op_e500,
+	.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
+	.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+};
+
 static int __init kvmppc_e500_init(void)
 {
 	int r, i;
@@ -512,11 +533,11 @@ static int __init kvmppc_e500_init(void)
 
 	r = kvmppc_core_check_processor_compat();
 	if (r)
-		return r;
+		goto err_out;
 
 	r = kvmppc_booke_init();
 	if (r)
-		return r;
+		goto err_out;
 
 	/* copy extra E500 exception handlers */
 	ivor[0] = mfspr(SPRN_IVOR32);
@@ -534,7 +555,9 @@ static int __init kvmppc_e500_init(void)
 	flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
 			   ivor[max_ivor] + handler_len);
 
-	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	r = kvm_init(&kvm_ops_e500, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+err_out:
+	return r;
 }
 
 static void __exit kvmppc_e500_exit(void)
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 6163a0318d2a..89b7f821f6c4 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -103,8 +103,8 @@ static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return emulated;
 }
 
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                           unsigned int inst, int *advance)
+int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
+				unsigned int inst, int *advance)
 {
 	int emulated = EMULATE_DONE;
 	int ra = get_ra(inst);
@@ -172,7 +172,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return emulated;
 }
 
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	int emulated = EMULATE_DONE;
@@ -263,7 +263,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	return emulated;
 }
 
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	int emulated = EMULATE_DONE;
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 6d6f153b6c1d..d25bb75aec90 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -536,7 +536,7 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
 	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
 }
 
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu)
 {
 }
 
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 19c8379575f7..db6a383401c7 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -110,7 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
 
 static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu);
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
@@ -147,7 +147,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	kvmppc_load_guest_fp(vcpu);
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.eplc = mfspr(SPRN_EPLC);
 	vcpu->arch.epsc = mfspr(SPRN_EPSC);
@@ -204,7 +204,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_get_sregs_e500mc(struct kvm_vcpu *vcpu,
+					struct kvm_sregs *sregs)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
@@ -224,10 +225,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
 	sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
 
-	kvmppc_get_sregs_ivor(vcpu, sregs);
+	return kvmppc_get_sregs_ivor(vcpu, sregs);
 }
 
-int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu,
+					struct kvm_sregs *sregs)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	int ret;
@@ -260,21 +262,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
-			union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
+			      union kvmppc_one_reg *val)
 {
 	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
 	return r;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
-		       union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
+			      union kvmppc_one_reg *val)
 {
 	int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
 	return r;
 }
 
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
+						       unsigned int id)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500;
 	struct kvm_vcpu *vcpu;
@@ -315,7 +318,7 @@ out:
 	return ERR_PTR(err);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
@@ -325,7 +328,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 	kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
 }
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
 {
 	int lpid;
 
@@ -337,23 +340,44 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 	return 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
 {
 	kvmppc_free_lpid(kvm->arch.lpid);
 }
 
+static struct kvmppc_ops kvm_ops_e500mc = {
+	.get_sregs = kvmppc_core_get_sregs_e500mc,
+	.set_sregs = kvmppc_core_set_sregs_e500mc,
+	.get_one_reg = kvmppc_get_one_reg_e500mc,
+	.set_one_reg = kvmppc_set_one_reg_e500mc,
+	.vcpu_load   = kvmppc_core_vcpu_load_e500mc,
+	.vcpu_put    = kvmppc_core_vcpu_put_e500mc,
+	.vcpu_create = kvmppc_core_vcpu_create_e500mc,
+	.vcpu_free   = kvmppc_core_vcpu_free_e500mc,
+	.mmu_destroy  = kvmppc_mmu_destroy_e500,
+	.init_vm = kvmppc_core_init_vm_e500mc,
+	.destroy_vm = kvmppc_core_destroy_vm_e500mc,
+	.emulate_op = kvmppc_core_emulate_op_e500,
+	.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
+	.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+};
+
 static int __init kvmppc_e500mc_init(void)
 {
 	int r;
 
 	r = kvmppc_booke_init();
 	if (r)
-		return r;
+		goto err_out;
 
 	kvmppc_init_lpid(64);
 	kvmppc_claim_lpid(0); /* host */
 
-	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	r = kvm_init(&kvm_ops_e500mc, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	if (r)
+		goto err_out;
+err_out:
+	return r;
 }
 
 static void __exit kvmppc_e500mc_exit(void)
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 751cd45f65a0..81762eb7957e 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -130,7 +130,7 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_PIR: break;
 
 	default:
-		emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
+		emulated = kvmppc_ops->emulate_mtspr(vcpu, sprn,
 						     spr_val);
 		if (emulated == EMULATE_FAIL)
 			printk(KERN_INFO "mtspr: unknown spr "
@@ -191,7 +191,7 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		spr_val = kvmppc_get_dec(vcpu, get_tb());
 		break;
 	default:
-		emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
+		emulated = kvmppc_ops->emulate_mfspr(vcpu, sprn,
 						     &spr_val);
 		if (unlikely(emulated == EMULATE_FAIL)) {
 			printk(KERN_INFO "mfspr: unknown spr "
@@ -464,7 +464,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 
 	if (emulated == EMULATE_FAIL) {
-		emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
+		emulated = kvmppc_ops->emulate_op(run, vcpu, inst, &advance);
 		if (emulated == EMULATE_AGAIN) {
 			advance = 0;
 		} else if (emulated == EMULATE_FAIL) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 07c0106fab76..69b930550d2e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -39,6 +39,8 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+struct kvmppc_ops *kvmppc_ops;
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
 	return !!(v->arch.pending_exceptions) ||
@@ -1024,52 +1026,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
 		goto out;
 	}
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-#ifdef CONFIG_KVM_BOOK3S_64_HV
-	case KVM_ALLOCATE_RMA: {
-		struct kvm_allocate_rma rma;
-		struct kvm *kvm = filp->private_data;
-
-		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
-		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
-			r = -EFAULT;
-		break;
-	}
-
-	case KVM_PPC_ALLOCATE_HTAB: {
-		u32 htab_order;
-
-		r = -EFAULT;
-		if (get_user(htab_order, (u32 __user *)argp))
-			break;
-		r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
-		if (r)
-			break;
-		r = -EFAULT;
-		if (put_user(htab_order, (u32 __user *)argp))
-			break;
-		r = 0;
-		break;
-	}
-
-	case KVM_PPC_GET_HTAB_FD: {
-		struct kvm_get_htab_fd ghf;
-
-		r = -EFAULT;
-		if (copy_from_user(&ghf, argp, sizeof(ghf)))
-			break;
-		r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
-		break;
-	}
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
-
-#ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_PPC_GET_SMMU_INFO: {
 		struct kvm_ppc_smmu_info info;
 
 		memset(&info, 0, sizeof(info));
-		r = kvm_vm_ioctl_get_smmu_info(kvm, &info);
+		r = kvmppc_ops->get_smmu_info(kvm, &info);
 		if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
 			r = -EFAULT;
 		break;
@@ -1080,11 +1041,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
 		break;
 	}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+	default:
+		r = kvmppc_ops->arch_vm_ioctl(filp, ioctl, arg);
+
+#else /* CONFIG_PPC_BOOK3S_64 */
 	default:
 		r = -ENOTTY;
+#endif
 	}
-
 out:
 	return r;
 }
@@ -1125,9 +1089,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids_param)
 
 int kvm_arch_init(void *opaque)
 {
+	if (kvmppc_ops) {
+		printk(KERN_ERR "kvm: already loaded the other module\n");
+		return -EEXIST;
+	}
+	kvmppc_ops = (struct kvmppc_ops *)opaque;
 	return 0;
 }
 
 void kvm_arch_exit(void)
 {
+	kvmppc_ops = NULL;
 }
-- 
cgit v1.2.3


From dd96b2c2dc408faf2213bc0a05897c1359f7969c Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:17:55 +0530
Subject: kvm: powerpc: book3s: Cleanup interrupt handling code

With this patch if HV is included, interrupts come in to the HV version
of the kvmppc_interrupt code, which then jumps to the PR handler,
renamed to kvmppc_interrupt_pr, if the guest is a PR guest. This helps
in enabling both HV and PR, which we do in later patch

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/exception-64s.h | 11 +++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |  9 +++++++--
 arch/powerpc/kvm/book3s_segment.S        |  4 ++--
 3 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index a22c9854a633..894662a5d4d5 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -198,6 +198,17 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	cmpwi	r10,0;							\
 	bne	do_kvm_##n
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * If hv is possible, interrupts come into to the hv version
+ * of the kvmppc_interrupt code, which then jumps to the PR handler,
+ * kvmppc_interrupt_pr, if the guest is a PR guest.
+ */
+#define kvmppc_interrupt kvmppc_interrupt_hv
+#else
+#define kvmppc_interrupt kvmppc_interrupt_pr
+#endif
+
 #define __KVM_HANDLER(area, h, n)					\
 do_kvm_##n:								\
 	BEGIN_FTR_SECTION_NESTED(947)					\
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 84105eb18a0e..f7e24c6cb8eb 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -742,8 +742,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 /*
  * We come here from the first-level interrupt handlers.
  */
-	.globl	kvmppc_interrupt
-kvmppc_interrupt:
+	.globl	kvmppc_interrupt_hv
+kvmppc_interrupt_hv:
 	/*
 	 * Register contents:
 	 * R12		= interrupt vector
@@ -757,6 +757,11 @@ kvmppc_interrupt:
 	lbz	r9, HSTATE_IN_GUEST(r13)
 	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
 	beq	kvmppc_bad_host_intr
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	cmpwi	r9, KVM_GUEST_MODE_GUEST
+	ld	r9, HSTATE_HOST_R2(r13)
+	beq	kvmppc_interrupt_pr
+#endif
 	/* We're now back in the host but in guest MMU context */
 	li	r9, KVM_GUEST_MODE_HOST_HV
 	stb	r9, HSTATE_IN_GUEST(r13)
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 1abe4788191a..bc50c97751d3 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -161,8 +161,8 @@ kvmppc_handler_trampoline_enter_end:
 .global kvmppc_handler_trampoline_exit
 kvmppc_handler_trampoline_exit:
 
-.global kvmppc_interrupt
-kvmppc_interrupt:
+.global kvmppc_interrupt_pr
+kvmppc_interrupt_pr:
 
 	/* Register usage at this point:
 	 *
-- 
cgit v1.2.3


From 699cc87641c123128bf3a4e12c0a8d739b1ac2f3 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:17:56 +0530
Subject: kvm: powerpc: book3s: Add is_hv_enabled to kvmppc_ops

This help us to identify whether we are running with hypervisor mode KVM
enabled. The change is needed so that we can have both HV and PR kvm
enabled in the same kernel.

If both HV and PR KVM are included, interrupts come in to the HV version
of the kvmppc_interrupt code, which then jumps to the PR handler,
renamed to kvmppc_interrupt_pr, if the guest is a PR guest.

Allowing both PR and HV in the same kernel required some changes to
kvm_dev_ioctl_check_extension(), since the values returned now can't
be selected with #ifdefs as much as previously. We look at is_hv_enabled
to return the right value when checking for capabilities.For capabilities that
are only provided by HV KVM, we return the HV value only if
is_hv_enabled is true. For capabilities provided by PR KVM but not HV,
we return the PR value only if is_hv_enabled is false.

NOTE: in later patch we replace is_hv_enabled with a static inline
function comparing kvm_ppc_ops

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h | 53 ----------------------------------
 arch/powerpc/include/asm/kvm_ppc.h    |  5 ++--
 arch/powerpc/kvm/book3s.c             | 44 ++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c          |  1 +
 arch/powerpc/kvm/book3s_pr.c          |  1 +
 arch/powerpc/kvm/book3s_xics.c        |  2 +-
 arch/powerpc/kvm/powerpc.c            | 54 +++++++++++++++++++----------------
 7 files changed, 79 insertions(+), 81 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 315a5d692417..4a594b76674d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -301,59 +301,6 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 	return vcpu->arch.fault_dar;
 }
 
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-	return to_book3s(vcpu)->hior;
-}
-
-static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
-			unsigned long pending_now, unsigned long old_pending)
-{
-	if (pending_now)
-		vcpu->arch.shared->int_pending = 1;
-	else if (old_pending)
-		vcpu->arch.shared->int_pending = 0;
-}
-
-static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
-{
-	ulong crit_raw = vcpu->arch.shared->critical;
-	ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
-	bool crit;
-
-	/* Truncate crit indicators in 32 bit mode */
-	if (!(vcpu->arch.shared->msr & MSR_SF)) {
-		crit_raw &= 0xffffffff;
-		crit_r1 &= 0xffffffff;
-	}
-
-	/* Critical section when crit == r1 */
-	crit = (crit_raw == crit_r1);
-	/* ... and we're in supervisor mode */
-	crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
-
-	return crit;
-}
-#else /* CONFIG_KVM_BOOK3S_PR_POSSIBLE */
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
-			unsigned long pending_now, unsigned long old_pending)
-{
-}
-
-static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
-{
-	return false;
-}
-#endif
-
 /* Magic register values loaded into r3 and r4 before the 'sc' assembly
  * instruction for the OSI hypercalls */
 #define OSI_SC_MAGIC_R3			0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 326033c99385..c13f15db476c 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -180,6 +180,7 @@ union kvmppc_one_reg {
 };
 
 struct kvmppc_ops {
+	bool is_hv_enabled;
 	int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 	int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 	int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id,
@@ -309,10 +310,10 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 
 static inline u32 kvmppc_get_xics_latch(void)
 {
-	u32 xirr = get_paca()->kvm_hstate.saved_xirr;
+	u32 xirr;
 
+	xirr = get_paca()->kvm_hstate.saved_xirr;
 	get_paca()->kvm_hstate.saved_xirr = 0;
-
 	return xirr;
 }
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 784a1d5ccb04..493aff77f84e 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -69,6 +69,50 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 {
 }
 
+static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
+{
+	if (!kvmppc_ops->is_hv_enabled)
+		return to_book3s(vcpu)->hior;
+	return 0;
+}
+
+static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
+			unsigned long pending_now, unsigned long old_pending)
+{
+	if (kvmppc_ops->is_hv_enabled)
+		return;
+	if (pending_now)
+		vcpu->arch.shared->int_pending = 1;
+	else if (old_pending)
+		vcpu->arch.shared->int_pending = 0;
+}
+
+static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
+{
+	ulong crit_raw;
+	ulong crit_r1;
+	bool crit;
+
+	if (kvmppc_ops->is_hv_enabled)
+		return false;
+
+	crit_raw = vcpu->arch.shared->critical;
+	crit_r1 = kvmppc_get_gpr(vcpu, 1);
+
+	/* Truncate crit indicators in 32 bit mode */
+	if (!(vcpu->arch.shared->msr & MSR_SF)) {
+		crit_raw &= 0xffffffff;
+		crit_r1 &= 0xffffffff;
+	}
+
+	/* Critical section when crit == r1 */
+	crit = (crit_raw == crit_r1);
+	/* ... and we're in supervisor mode */
+	crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
+
+	return crit;
+}
+
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
 	vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3d7079a08d2a..0782c8688c8b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2159,6 +2159,7 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 }
 
 static struct kvmppc_ops kvmppc_hv_ops = {
+	.is_hv_enabled = true,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
 	.get_one_reg = kvmppc_get_one_reg_hv,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 05d28cf4a06c..b6a525d7b5c3 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1523,6 +1523,7 @@ static long kvm_arch_vm_ioctl_pr(struct file *filp,
 }
 
 static struct kvmppc_ops kvmppc_pr_ops = {
+	.is_hv_enabled = false,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
 	.get_one_reg = kvmppc_get_one_reg_pr,
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 51ed1ea440e0..cef3de96ff6c 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
 	}
 
 	/* Check for real mode returning too hard */
-	if (xics->real_mode)
+	if (xics->real_mode && kvmppc_ops->is_hv_enabled)
 		return kvmppc_xics_rm_complete(vcpu, req);
 
 	switch (req) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 69b930550d2e..874a2a5e6cec 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -52,7 +52,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-#ifndef CONFIG_KVM_BOOK3S_64_HV
 /*
  * Common checks before entering the guest world.  Call with interrupts
  * disabled.
@@ -127,7 +126,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 
 	return r;
 }
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
 
 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 {
@@ -194,11 +192,9 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
 	if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
 		goto out;
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
 	/* HV KVM can only do PAPR mode for now */
-	if (!vcpu->arch.papr_enabled)
+	if (!vcpu->arch.papr_enabled && kvmppc_ops->is_hv_enabled)
 		goto out;
-#endif
 
 #ifdef CONFIG_KVM_BOOKE_HV
 	if (!cpu_has_feature(CPU_FTR_EMB_HV))
@@ -322,22 +318,26 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_DEVICE_CTRL:
 		r = 1;
 		break;
-#ifndef CONFIG_KVM_BOOK3S_64_HV
 	case KVM_CAP_PPC_PAIRED_SINGLES:
 	case KVM_CAP_PPC_OSI:
 	case KVM_CAP_PPC_GET_PVINFO:
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
 	case KVM_CAP_SW_TLB:
 #endif
-#ifdef CONFIG_KVM_MPIC
-	case KVM_CAP_IRQ_MPIC:
-#endif
-		r = 1;
+		/* We support this only for PR */
+		r = !kvmppc_ops->is_hv_enabled;
 		break;
+#ifdef CONFIG_KVM_MMIO
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
 #endif
+#ifdef CONFIG_KVM_MPIC
+	case KVM_CAP_IRQ_MPIC:
+		r = 1;
+		break;
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CAP_SPAPR_TCE:
 	case KVM_CAP_PPC_ALLOC_HTAB:
@@ -348,32 +348,37 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = 1;
 		break;
 #endif /* CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	case KVM_CAP_PPC_SMT:
-		r = threads_per_core;
+		if (kvmppc_ops->is_hv_enabled)
+			r = threads_per_core;
+		else
+			r = 0;
 		break;
 	case KVM_CAP_PPC_RMA:
-		r = 1;
+		r = kvmppc_ops->is_hv_enabled;
 		/* PPC970 requires an RMA */
-		if (cpu_has_feature(CPU_FTR_ARCH_201))
+		if (r && cpu_has_feature(CPU_FTR_ARCH_201))
 			r = 2;
 		break;
 #endif
 	case KVM_CAP_SYNC_MMU:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
-		r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+		if (kvmppc_ops->is_hv_enabled)
+			r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+		else
+			r = 0;
 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 		r = 1;
 #else
 		r = 0;
-		break;
 #endif
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+		break;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	case KVM_CAP_PPC_HTAB_FD:
-		r = 1;
+		r = kvmppc_ops->is_hv_enabled;
 		break;
 #endif
-		break;
 	case KVM_CAP_NR_VCPUS:
 		/*
 		 * Recommending a number of CPUs is somewhat arbitrary; we
@@ -381,11 +386,10 @@ int kvm_dev_ioctl_check_extension(long ext)
 		 * will have secondary threads "offline"), and for other KVM
 		 * implementations just count online CPUs.
 		 */
-#ifdef CONFIG_KVM_BOOK3S_64_HV
-		r = num_present_cpus();
-#else
-		r = num_online_cpus();
-#endif
+		if (kvmppc_ops->is_hv_enabled)
+			r = num_present_cpus();
+		else
+			r = num_online_cpus();
 		break;
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
-- 
cgit v1.2.3


From 5587027ce9d59a57aecaa190be1c8e560aaff45d Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:18:00 +0530
Subject: kvm: Add struct kvm arg to memslot APIs

We will use that in the later patch to find the kvm ops handler

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/arm/kvm/arm.c                 |  5 +++--
 arch/ia64/kvm/kvm-ia64.c           |  5 +++--
 arch/mips/kvm/kvm_mips.c           |  5 +++--
 arch/powerpc/include/asm/kvm_ppc.h |  6 ++++--
 arch/powerpc/kvm/book3s.c          |  4 ++--
 arch/powerpc/kvm/booke.c           |  4 ++--
 arch/powerpc/kvm/powerpc.c         |  9 +++++----
 arch/s390/kvm/kvm-s390.c           |  5 +++--
 arch/x86/kvm/x86.c                 |  5 +++--
 include/linux/kvm_host.h           |  5 +++--
 virt/kvm/kvm_main.c                | 12 ++++++------
 11 files changed, 37 insertions(+), 28 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index cc5adb9349ef..e312e4a53f8d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bdfd8789b376..985bf80c622e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index a7b044536de4..73b34827826c 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 	return -ENOIOCTLCMD;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c13f15db476c..20f461637090 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -134,9 +134,11 @@ extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
-extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+extern void kvmppc_core_free_memslot(struct kvm *kvm,
+				     struct kvm_memory_slot *free,
 				     struct kvm_memory_slot *dont);
-extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+extern int kvmppc_core_create_memslot(struct kvm *kvm,
+				      struct kvm_memory_slot *slot,
 				      unsigned long npages);
 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 39d2994f9d27..130fe1d75bac 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -761,13 +761,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return kvmppc_ops->get_dirty_log(kvm, log);
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
 	kvmppc_ops->free_memslot(free, dont);
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
 	return kvmppc_ops->create_memslot(slot, npages);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1769354d0bf8..cb2d986a3382 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1662,12 +1662,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return -ENOTSUPP;
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
 	return 0;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9aaa07efa4b6..b103d747934a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -417,15 +417,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
-	kvmppc_core_free_memslot(free, dont);
+	kvmppc_core_free_memslot(kvm, free, dont);
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
-	return kvmppc_core_create_memslot(slot, npages);
+	return kvmppc_core_create_memslot(kvm, slot, npages);
 }
 
 void kvm_arch_memslots_updated(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1e4e7b97337a..bedda67cc222 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1089,12 +1089,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index edf2a07df3a3..666526a55c46 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7080,7 +7080,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 	int i;
@@ -7101,7 +7101,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 	}
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	int i;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c9d4236ab442..8b0107dc2067 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -507,9 +507,10 @@ int kvm_set_memory_region(struct kvm *kvm,
 			  struct kvm_userspace_memory_region *mem);
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem);
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages);
 void kvm_arch_memslots_updated(struct kvm *kvm);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 94c6e3f6f244..0932c3b64155 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -542,13 +542,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 /*
  * Free any memory in @free but not in @dont.
  */
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		kvm_destroy_dirty_bitmap(free);
 
-	kvm_arch_free_memslot(free, dont);
+	kvm_arch_free_memslot(kvm, free, dont);
 
 	free->npages = 0;
 }
@@ -559,7 +559,7 @@ void kvm_free_physmem(struct kvm *kvm)
 	struct kvm_memory_slot *memslot;
 
 	kvm_for_each_memslot(memslot, slots)
-		kvm_free_physmem_slot(memslot, NULL);
+		kvm_free_physmem_slot(kvm, memslot, NULL);
 
 	kfree(kvm->memslots);
 }
@@ -823,7 +823,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (change == KVM_MR_CREATE) {
 		new.userspace_addr = mem->userspace_addr;
 
-		if (kvm_arch_create_memslot(&new, npages))
+		if (kvm_arch_create_memslot(kvm, &new, npages))
 			goto out_free;
 	}
 
@@ -899,7 +899,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
-	kvm_free_physmem_slot(&old, &new);
+	kvm_free_physmem_slot(kvm, &old, &new);
 	kfree(old_memslots);
 
 	return 0;
@@ -907,7 +907,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 out_slots:
 	kfree(slots);
 out_free:
-	kvm_free_physmem_slot(&new, &old);
+	kvm_free_physmem_slot(kvm, &new, &old);
 out:
 	return r;
 }
-- 
cgit v1.2.3


From cbbc58d4fdfab1a39a6ac1b41fcb17885952157a Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:18:01 +0530
Subject: kvm: powerpc: book3s: Allow the HV and PR selection per virtual
 machine

This moves the kvmppc_ops callbacks to be a per VM entity. This
enables us to select HV and PR mode when creating a VM. We also
allow both kvm-hv and kvm-pr kernel module to be loaded. To
achieve this we move /dev/kvm ownership to kvm.ko module. Depending on
which KVM mode we select during VM creation we take a reference
count on respective module

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[agraf: fix coding style]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/include/asm/kvm_ppc.h  |  7 +--
 arch/powerpc/kvm/44x.c              |  7 ++-
 arch/powerpc/kvm/book3s.c           | 89 +++++++++++++++++++++++++------------
 arch/powerpc/kvm/book3s.h           |  2 +
 arch/powerpc/kvm/book3s_hv.c        | 18 ++++----
 arch/powerpc/kvm/book3s_pr.c        | 25 +++++++----
 arch/powerpc/kvm/book3s_xics.c      |  2 +-
 arch/powerpc/kvm/booke.c            | 22 ++++-----
 arch/powerpc/kvm/e500.c             |  8 +++-
 arch/powerpc/kvm/e500mc.c           |  6 ++-
 arch/powerpc/kvm/emulate.c          | 11 ++---
 arch/powerpc/kvm/powerpc.c          | 76 ++++++++++++++++++++++---------
 include/uapi/linux/kvm.h            |  4 ++
 14 files changed, 187 insertions(+), 91 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 61ce4dca45d3..237d1d25b448 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -270,6 +270,7 @@ struct kvm_arch {
 #ifdef CONFIG_KVM_XICS
 	struct kvmppc_xics *xics;
 #endif
+	struct kvmppc_ops *kvm_ops;
 };
 
 /*
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 20f461637090..3069cf4dcc88 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -182,6 +182,7 @@ union kvmppc_one_reg {
 };
 
 struct kvmppc_ops {
+	struct module *owner;
 	bool is_hv_enabled;
 	int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 	int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
@@ -217,7 +218,6 @@ struct kvmppc_ops {
 			      unsigned long npages);
 	int (*init_vm)(struct kvm *kvm);
 	void (*destroy_vm)(struct kvm *kvm);
-	int (*check_processor_compat)(void);
 	int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info);
 	int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			  unsigned int inst, int *advance);
@@ -229,7 +229,8 @@ struct kvmppc_ops {
 
 };
 
-extern struct kvmppc_ops *kvmppc_ops;
+extern struct kvmppc_ops *kvmppc_hv_ops;
+extern struct kvmppc_ops *kvmppc_pr_ops;
 
 /*
  * Cuts out inst bits with ordering according to spec.
@@ -326,7 +327,7 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 
 static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->fast_vcpu_kick(vcpu);
+	vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu);
 }
 
 #else
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index a765bcd74fbb..93221e87b911 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -213,16 +213,19 @@ static int __init kvmppc_44x_init(void)
 	if (r)
 		goto err_out;
 
-	r = kvm_init(&kvm_ops_44x, sizeof(struct kvmppc_vcpu_44x),
-		     0, THIS_MODULE);
+	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
 	if (r)
 		goto err_out;
+	kvm_ops_44x.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_44x;
+
 err_out:
 	return r;
 }
 
 static void __exit kvmppc_44x_exit(void)
 {
+	kvmppc_pr_ops = NULL;
 	kvmppc_booke_exit();
 }
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 130fe1d75bac..ad8f6ed3f136 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -34,6 +34,7 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 
+#include "book3s.h"
 #include "trace.h"
 
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -71,7 +72,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 
 static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 {
-	if (!kvmppc_ops->is_hv_enabled)
+	if (!vcpu->kvm->arch.kvm_ops->is_hv_enabled)
 		return to_book3s(vcpu)->hior;
 	return 0;
 }
@@ -79,7 +80,7 @@ static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
 			unsigned long pending_now, unsigned long old_pending)
 {
-	if (kvmppc_ops->is_hv_enabled)
+	if (vcpu->kvm->arch.kvm_ops->is_hv_enabled)
 		return;
 	if (pending_now)
 		vcpu->arch.shared->int_pending = 1;
@@ -93,7 +94,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 	ulong crit_r1;
 	bool crit;
 
-	if (kvmppc_ops->is_hv_enabled)
+	if (vcpu->kvm->arch.kvm_ops->is_hv_enabled)
 		return false;
 
 	crit_raw = vcpu->arch.shared->critical;
@@ -477,13 +478,13 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
-	return kvmppc_ops->get_sregs(vcpu, sregs);
+	return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
-	return kvmppc_ops->set_sregs(vcpu, sregs);
+	return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -562,7 +563,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	if (size > sizeof(val))
 		return -EINVAL;
 
-	r = kvmppc_ops->get_one_reg(vcpu, reg->id, &val);
+	r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
 	if (r == -EINVAL) {
 		r = 0;
 		switch (reg->id) {
@@ -641,7 +642,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
 		return -EFAULT;
 
-	r = kvmppc_ops->set_one_reg(vcpu, reg->id, &val);
+	r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
 	if (r == -EINVAL) {
 		r = 0;
 		switch (reg->id) {
@@ -702,23 +703,23 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	kvmppc_ops->vcpu_load(vcpu, cpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->vcpu_put(vcpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
 }
 
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 {
-	kvmppc_ops->set_msr(vcpu, msr);
+	vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr);
 }
 EXPORT_SYMBOL_GPL(kvmppc_set_msr);
 
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
-	return kvmppc_ops->vcpu_run(kvm_run, vcpu);
+	return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
@@ -743,84 +744,84 @@ void kvmppc_decrementer_func(unsigned long data)
 
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
-	return kvmppc_ops->vcpu_create(kvm, id);
+	return kvm->arch.kvm_ops->vcpu_create(kvm, id);
 }
 
 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->vcpu_free(vcpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
 }
 
 int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 {
-	return kvmppc_ops->check_requests(vcpu);
+	return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
 }
 
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 {
-	return kvmppc_ops->get_dirty_log(kvm, log);
+	return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
 }
 
 void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
-	kvmppc_ops->free_memslot(free, dont);
+	kvm->arch.kvm_ops->free_memslot(free, dont);
 }
 
 int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
-	return kvmppc_ops->create_memslot(slot, npages);
+	return kvm->arch.kvm_ops->create_memslot(slot, npages);
 }
 
 void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
-	kvmppc_ops->flush_memslot(kvm, memslot);
+	kvm->arch.kvm_ops->flush_memslot(kvm, memslot);
 }
 
 int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem)
 {
-	return kvmppc_ops->prepare_memory_region(kvm, memslot, mem);
+	return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
 }
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
 				const struct kvm_memory_slot *old)
 {
-	kvmppc_ops->commit_memory_region(kvm, mem, old);
+	kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old);
 }
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvmppc_ops->unmap_hva(kvm, hva);
+	return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
 }
 EXPORT_SYMBOL_GPL(kvm_unmap_hva);
 
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	return kvmppc_ops->unmap_hva_range(kvm, start, end);
+	return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
 }
 
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvmppc_ops->age_hva(kvm, hva);
+	return kvm->arch.kvm_ops->age_hva(kvm, hva);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvmppc_ops->test_age_hva(kvm, hva);
+	return kvm->arch.kvm_ops->test_age_hva(kvm, hva);
 }
 
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
-	kvmppc_ops->set_spte_hva(kvm, hva, pte);
+	kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte);
 }
 
 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->mmu_destroy(vcpu);
+	vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
 }
 
 int kvmppc_core_init_vm(struct kvm *kvm)
@@ -831,12 +832,12 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 #endif
 
-	return kvmppc_ops->init_vm(kvm);
+	return kvm->arch.kvm_ops->init_vm(kvm);
 }
 
 void kvmppc_core_destroy_vm(struct kvm *kvm)
 {
-	kvmppc_ops->destroy_vm(kvm);
+	kvm->arch.kvm_ops->destroy_vm(kvm);
 
 #ifdef CONFIG_PPC64
 	kvmppc_rtas_tokens_free(kvm);
@@ -846,5 +847,35 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 
 int kvmppc_core_check_processor_compat(void)
 {
-	return kvmppc_ops->check_processor_compat();
+	/*
+	 * We always return 0 for book3s. We check
+	 * for compatability while loading the HV
+	 * or PR module
+	 */
+	return 0;
+}
+
+static int kvmppc_book3s_init(void)
+{
+	int r;
+
+	r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	if (r)
+		return r;
+#ifdef CONFIG_KVM_BOOK3S_32
+	r = kvmppc_book3s_init_pr();
+#endif
+	return r;
+
+}
+
+static void kvmppc_book3s_exit(void)
+{
+#ifdef CONFIG_KVM_BOOK3S_32
+	kvmppc_book3s_exit_pr();
+#endif
+	kvm_exit();
 }
+
+module_init(kvmppc_book3s_init);
+module_exit(kvmppc_book3s_exit);
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 9e5b3a341943..4bf956cf94d6 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -28,5 +28,7 @@ extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
 					int sprn, ulong spr_val);
 extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
 					int sprn, ulong *spr_val);
+extern int kvmppc_book3s_init_pr(void);
+extern void kvmppc_book3s_exit_pr(void);
 
 #endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9e954a81c078..8743048881b7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2159,7 +2159,7 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 	return r;
 }
 
-static struct kvmppc_ops kvmppc_hv_ops = {
+static struct kvmppc_ops kvm_ops_hv = {
 	.is_hv_enabled = true,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -2186,7 +2186,6 @@ static struct kvmppc_ops kvmppc_hv_ops = {
 	.create_memslot = kvmppc_core_create_memslot_hv,
 	.init_vm =  kvmppc_core_init_vm_hv,
 	.destroy_vm = kvmppc_core_destroy_vm_hv,
-	.check_processor_compat = kvmppc_core_check_processor_compat_hv,
 	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
 	.emulate_op = kvmppc_core_emulate_op_hv,
 	.emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
@@ -2198,20 +2197,23 @@ static struct kvmppc_ops kvmppc_hv_ops = {
 static int kvmppc_book3s_init_hv(void)
 {
 	int r;
-
-	r = kvm_init(&kvmppc_hv_ops, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
-
-	if (r)
+	/*
+	 * FIXME!! Do we need to check on all cpus ?
+	 */
+	r = kvmppc_core_check_processor_compat_hv();
+	if (r < 0)
 		return r;
 
-	r = kvmppc_mmu_hv_init();
+	kvm_ops_hv.owner = THIS_MODULE;
+	kvmppc_hv_ops = &kvm_ops_hv;
 
+	r = kvmppc_mmu_hv_init();
 	return r;
 }
 
 static void kvmppc_book3s_exit_hv(void)
 {
-	kvm_exit();
+	kvmppc_hv_ops = NULL;
 }
 
 module_init(kvmppc_book3s_init_hv);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7f583a482161..fbd985f0cb02 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1525,7 +1525,7 @@ static long kvm_arch_vm_ioctl_pr(struct file *filp,
 	return -ENOTTY;
 }
 
-static struct kvmppc_ops kvmppc_pr_ops = {
+static struct kvmppc_ops kvm_ops_pr = {
 	.is_hv_enabled = false,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
@@ -1552,7 +1552,6 @@ static struct kvmppc_ops kvmppc_pr_ops = {
 	.create_memslot = kvmppc_core_create_memslot_pr,
 	.init_vm = kvmppc_core_init_vm_pr,
 	.destroy_vm = kvmppc_core_destroy_vm_pr,
-	.check_processor_compat = kvmppc_core_check_processor_compat_pr,
 	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
 	.emulate_op = kvmppc_core_emulate_op_pr,
 	.emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
@@ -1561,27 +1560,35 @@ static struct kvmppc_ops kvmppc_pr_ops = {
 	.arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
 };
 
-static int kvmppc_book3s_init_pr(void)
+
+int kvmppc_book3s_init_pr(void)
 {
 	int r;
 
-	r = kvm_init(&kvmppc_pr_ops, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
-
-	if (r)
+	r = kvmppc_core_check_processor_compat_pr();
+	if (r < 0)
 		return r;
 
-	r = kvmppc_mmu_hpte_sysinit();
+	kvm_ops_pr.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_pr;
 
+	r = kvmppc_mmu_hpte_sysinit();
 	return r;
 }
 
-static void kvmppc_book3s_exit_pr(void)
+void kvmppc_book3s_exit_pr(void)
 {
+	kvmppc_pr_ops = NULL;
 	kvmppc_mmu_hpte_sysexit();
-	kvm_exit();
 }
 
+/*
+ * We only support separate modules for book3s 64
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+
 module_init(kvmppc_book3s_init_pr);
 module_exit(kvmppc_book3s_exit_pr);
 
 MODULE_LICENSE("GPL");
+#endif
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index c3c832b27ee5..f7a5108a3483 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
 	}
 
 	/* Check for real mode returning too hard */
-	if (xics->real_mode && kvmppc_ops->is_hv_enabled)
+	if (xics->real_mode && vcpu->kvm->arch.kvm_ops->is_hv_enabled)
 		return kvmppc_xics_rm_complete(vcpu, req);
 
 	switch (req) {
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index cb2d986a3382..15d0149511eb 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1472,7 +1472,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 	get_sregs_base(vcpu, sregs);
 	get_sregs_arch206(vcpu, sregs);
-	return kvmppc_ops->get_sregs(vcpu, sregs);
+	return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
@@ -1491,7 +1491,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (ret < 0)
 		return ret;
 
-	return kvmppc_ops->set_sregs(vcpu, sregs);
+	return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
 }
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
@@ -1548,7 +1548,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		val = get_reg_val(reg->id, vcpu->arch.vrsave);
 		break;
 	default:
-		r = kvmppc_ops->get_one_reg(vcpu, reg->id, &val);
+		r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
 		break;
 	}
 
@@ -1631,7 +1631,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		vcpu->arch.vrsave = set_reg_val(reg->id, val);
 		break;
 	default:
-		r = kvmppc_ops->set_one_reg(vcpu, reg->id, &val);
+		r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
 		break;
 	}
 
@@ -1911,37 +1911,37 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
 
 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->mmu_destroy(vcpu);
+	vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
 }
 
 int kvmppc_core_init_vm(struct kvm *kvm)
 {
-	return kvmppc_ops->init_vm(kvm);
+	return kvm->arch.kvm_ops->init_vm(kvm);
 }
 
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
-	return kvmppc_ops->vcpu_create(kvm, id);
+	return kvm->arch.kvm_ops->vcpu_create(kvm, id);
 }
 
 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->vcpu_free(vcpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
 }
 
 void kvmppc_core_destroy_vm(struct kvm *kvm)
 {
-	kvmppc_ops->destroy_vm(kvm);
+	kvm->arch.kvm_ops->destroy_vm(kvm);
 }
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	kvmppc_ops->vcpu_load(vcpu, cpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->vcpu_put(vcpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
 }
 
 int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index d225d5ebddcc..497b142f651c 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -555,13 +555,19 @@ static int __init kvmppc_e500_init(void)
 	flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
 			   ivor[max_ivor] + handler_len);
 
-	r = kvm_init(&kvm_ops_e500, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	if (r)
+		goto err_out;
+	kvm_ops_e500.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_e500;
+
 err_out:
 	return r;
 }
 
 static void __exit kvmppc_e500_exit(void)
 {
+	kvmppc_pr_ops = NULL;
 	kvmppc_booke_exit();
 }
 
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index db6a383401c7..4132cd2fc171 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -373,15 +373,19 @@ static int __init kvmppc_e500mc_init(void)
 	kvmppc_init_lpid(64);
 	kvmppc_claim_lpid(0); /* host */
 
-	r = kvm_init(&kvm_ops_e500mc, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
 	if (r)
 		goto err_out;
+	kvm_ops_e500mc.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_e500mc;
+
 err_out:
 	return r;
 }
 
 static void __exit kvmppc_e500mc_exit(void)
 {
+	kvmppc_pr_ops = NULL;
 	kvmppc_booke_exit();
 }
 
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index de9a340d22ed..2f9a0873b44f 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -130,8 +130,8 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_PIR: break;
 
 	default:
-		emulated = kvmppc_ops->emulate_mtspr(vcpu, sprn,
-						     spr_val);
+		emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn,
+								  spr_val);
 		if (emulated == EMULATE_FAIL)
 			printk(KERN_INFO "mtspr: unknown spr "
 				"0x%x\n", sprn);
@@ -191,8 +191,8 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		spr_val = kvmppc_get_dec(vcpu, get_tb());
 		break;
 	default:
-		emulated = kvmppc_ops->emulate_mfspr(vcpu, sprn,
-						     &spr_val);
+		emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn,
+								  &spr_val);
 		if (unlikely(emulated == EMULATE_FAIL)) {
 			printk(KERN_INFO "mfspr: unknown spr "
 				"0x%x\n", sprn);
@@ -464,7 +464,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 
 	if (emulated == EMULATE_FAIL) {
-		emulated = kvmppc_ops->emulate_op(run, vcpu, inst, &advance);
+		emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst,
+							       &advance);
 		if (emulated == EMULATE_AGAIN) {
 			advance = 0;
 		} else if (emulated == EMULATE_FAIL) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b103d747934a..0320c1721caa 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -26,6 +26,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/module.h>
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
@@ -39,7 +40,11 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-struct kvmppc_ops *kvmppc_ops;
+struct kvmppc_ops *kvmppc_hv_ops;
+EXPORT_SYMBOL_GPL(kvmppc_hv_ops);
+struct kvmppc_ops *kvmppc_pr_ops;
+EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
+
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
@@ -195,7 +200,7 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
 		goto out;
 
 	/* HV KVM can only do PAPR mode for now */
-	if (!vcpu->arch.papr_enabled && kvmppc_ops->is_hv_enabled)
+	if (!vcpu->arch.papr_enabled && vcpu->kvm->arch.kvm_ops->is_hv_enabled)
 		goto out;
 
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -271,10 +276,35 @@ void kvm_arch_check_processor_compat(void *rtn)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
-	if (type)
-		return -EINVAL;
-
+	struct kvmppc_ops *kvm_ops = NULL;
+	/*
+	 * if we have both HV and PR enabled, default is HV
+	 */
+	if (type == 0) {
+		if (kvmppc_hv_ops)
+			kvm_ops = kvmppc_hv_ops;
+		else
+			kvm_ops = kvmppc_pr_ops;
+		if (!kvm_ops)
+			goto err_out;
+	} else	if (type == KVM_VM_PPC_HV) {
+		if (!kvmppc_hv_ops)
+			goto err_out;
+		kvm_ops = kvmppc_hv_ops;
+	} else if (type == KVM_VM_PPC_PR) {
+		if (!kvmppc_pr_ops)
+			goto err_out;
+		kvm_ops = kvmppc_pr_ops;
+	} else
+		goto err_out;
+
+	if (kvm_ops->owner && !try_module_get(kvm_ops->owner))
+		return -ENOENT;
+
+	kvm->arch.kvm_ops = kvm_ops;
 	return kvmppc_core_init_vm(kvm);
+err_out:
+	return -EINVAL;
 }
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
@@ -294,6 +324,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvmppc_core_destroy_vm(kvm);
 
 	mutex_unlock(&kvm->lock);
+
+	/* drop the module reference */
+	module_put(kvm->arch.kvm_ops->owner);
 }
 
 void kvm_arch_sync_events(struct kvm *kvm)
@@ -303,6 +336,10 @@ void kvm_arch_sync_events(struct kvm *kvm)
 int kvm_dev_ioctl_check_extension(long ext)
 {
 	int r;
+	/* FIXME!!
+	 * Should some of this be vm ioctl ? is it possible now ?
+	 */
+	int hv_enabled = kvmppc_hv_ops ? 1 : 0;
 
 	switch (ext) {
 #ifdef CONFIG_BOOKE
@@ -329,7 +366,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_SW_TLB:
 #endif
 		/* We support this only for PR */
-		r = !kvmppc_ops->is_hv_enabled;
+		r = !hv_enabled;
 		break;
 #ifdef CONFIG_KVM_MMIO
 	case KVM_CAP_COALESCED_MMIO:
@@ -354,13 +391,13 @@ int kvm_dev_ioctl_check_extension(long ext)
 #endif /* CONFIG_PPC_BOOK3S_64 */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	case KVM_CAP_PPC_SMT:
-		if (kvmppc_ops->is_hv_enabled)
+		if (hv_enabled)
 			r = threads_per_core;
 		else
 			r = 0;
 		break;
 	case KVM_CAP_PPC_RMA:
-		r = kvmppc_ops->is_hv_enabled;
+		r = hv_enabled;
 		/* PPC970 requires an RMA */
 		if (r && cpu_has_feature(CPU_FTR_ARCH_201))
 			r = 2;
@@ -368,7 +405,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 #endif
 	case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-		if (kvmppc_ops->is_hv_enabled)
+		if (hv_enabled)
 			r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
 		else
 			r = 0;
@@ -380,7 +417,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		break;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	case KVM_CAP_PPC_HTAB_FD:
-		r = kvmppc_ops->is_hv_enabled;
+		r = hv_enabled;
 		break;
 #endif
 	case KVM_CAP_NR_VCPUS:
@@ -390,7 +427,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		 * will have secondary threads "offline"), and for other KVM
 		 * implementations just count online CPUs.
 		 */
-		if (kvmppc_ops->is_hv_enabled)
+		if (hv_enabled)
 			r = num_present_cpus();
 		else
 			r = num_online_cpus();
@@ -1039,9 +1076,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 	case KVM_PPC_GET_SMMU_INFO: {
 		struct kvm_ppc_smmu_info info;
+		struct kvm *kvm = filp->private_data;
 
 		memset(&info, 0, sizeof(info));
-		r = kvmppc_ops->get_smmu_info(kvm, &info);
+		r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info);
 		if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
 			r = -EFAULT;
 		break;
@@ -1052,9 +1090,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
 		break;
 	}
-	default:
-		r = kvmppc_ops->arch_vm_ioctl(filp, ioctl, arg);
-
+	default: {
+		struct kvm *kvm = filp->private_data;
+		r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
+	}
 #else /* CONFIG_PPC_BOOK3S_64 */
 	default:
 		r = -ENOTTY;
@@ -1104,15 +1143,10 @@ EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
 
 int kvm_arch_init(void *opaque)
 {
-	if (kvmppc_ops) {
-		printk(KERN_ERR "kvm: already loaded the other module\n");
-		return -EEXIST;
-	}
-	kvmppc_ops = (struct kvmppc_ops *)opaque;
 	return 0;
 }
 
 void kvm_arch_exit(void)
 {
-	kvmppc_ops = NULL;
+
 }
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e32e776f20c0..5b5341f78368 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -518,6 +518,10 @@ struct kvm_ppc_smmu_info {
 /* machine type bits, to be used as argument to KVM_CREATE_VM */
 #define KVM_VM_S390_UCONTROL	1
 
+/* on ppc, 0 indicate default, 1 should force HV and 2 PR */
+#define KVM_VM_PPC_HV 1
+#define KVM_VM_PPC_PR 2
+
 #define KVM_S390_SIE_PAGE_OFFSET 1
 
 /*
-- 
cgit v1.2.3


From a78b55d1c0218b6d91d504941d20e36435c276f5 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:18:02 +0530
Subject: kvm: powerpc: book3s: drop is_hv_enabled

drop is_hv_enabled, because that should not be a callback property

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_ppc.h | 6 +++++-
 arch/powerpc/kvm/book3s.c          | 6 +++---
 arch/powerpc/kvm/book3s_hv.c       | 1 -
 arch/powerpc/kvm/book3s_pr.c       | 1 -
 arch/powerpc/kvm/book3s_xics.c     | 2 +-
 arch/powerpc/kvm/powerpc.c         | 2 +-
 6 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 3069cf4dcc88..c8317fbf92c4 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -183,7 +183,6 @@ union kvmppc_one_reg {
 
 struct kvmppc_ops {
 	struct module *owner;
-	bool is_hv_enabled;
 	int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 	int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 	int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id,
@@ -232,6 +231,11 @@ struct kvmppc_ops {
 extern struct kvmppc_ops *kvmppc_hv_ops;
 extern struct kvmppc_ops *kvmppc_pr_ops;
 
+static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
+{
+	return kvm->arch.kvm_ops == kvmppc_hv_ops;
+}
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index ad8f6ed3f136..8912608b7e1b 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -72,7 +72,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 
 static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->kvm->arch.kvm_ops->is_hv_enabled)
+	if (!is_kvmppc_hv_enabled(vcpu->kvm))
 		return to_book3s(vcpu)->hior;
 	return 0;
 }
@@ -80,7 +80,7 @@ static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
 			unsigned long pending_now, unsigned long old_pending)
 {
-	if (vcpu->kvm->arch.kvm_ops->is_hv_enabled)
+	if (is_kvmppc_hv_enabled(vcpu->kvm))
 		return;
 	if (pending_now)
 		vcpu->arch.shared->int_pending = 1;
@@ -94,7 +94,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 	ulong crit_r1;
 	bool crit;
 
-	if (vcpu->kvm->arch.kvm_ops->is_hv_enabled)
+	if (is_kvmppc_hv_enabled(vcpu->kvm))
 		return false;
 
 	crit_raw = vcpu->arch.shared->critical;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8743048881b7..072287f1c3bc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2160,7 +2160,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 }
 
 static struct kvmppc_ops kvm_ops_hv = {
-	.is_hv_enabled = true,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
 	.get_one_reg = kvmppc_get_one_reg_hv,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index fbd985f0cb02..df36cf2ed22b 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1526,7 +1526,6 @@ static long kvm_arch_vm_ioctl_pr(struct file *filp,
 }
 
 static struct kvmppc_ops kvm_ops_pr = {
-	.is_hv_enabled = false,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
 	.get_one_reg = kvmppc_get_one_reg_pr,
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index f7a5108a3483..02a17dcf1610 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
 	}
 
 	/* Check for real mode returning too hard */
-	if (xics->real_mode && vcpu->kvm->arch.kvm_ops->is_hv_enabled)
+	if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm))
 		return kvmppc_xics_rm_complete(vcpu, req);
 
 	switch (req) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0320c1721caa..9ae97686e9f4 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -200,7 +200,7 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
 		goto out;
 
 	/* HV KVM can only do PAPR mode for now */
-	if (!vcpu->arch.papr_enabled && vcpu->kvm->arch.kvm_ops->is_hv_enabled)
+	if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm))
 		goto out;
 
 #ifdef CONFIG_KVM_BOOKE_HV
-- 
cgit v1.2.3


From 51ae8d4a2b9e4aa9a502061b9c39168e08829b94 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <r65777@freescale.com>
Date: Thu, 4 Jul 2013 11:45:46 +0530
Subject: powerpc: move debug registers in a structure

This way we can use same data type struct with KVM and
also help in using other debug related function.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
Acked-by: Michael Neuling <mikey@neuling.org>
[scottwood@freescale.com: removed obvious debug_reg comment]
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/include/asm/processor.h |  34 ++++----
 arch/powerpc/include/asm/reg_booke.h |   8 +-
 arch/powerpc/kernel/asm-offsets.c    |   2 +-
 arch/powerpc/kernel/process.c        |  42 +++++-----
 arch/powerpc/kernel/ptrace.c         | 154 +++++++++++++++++------------------
 arch/powerpc/kernel/ptrace32.c       |   2 +-
 arch/powerpc/kernel/signal_32.c      |   6 +-
 arch/powerpc/kernel/traps.c          |  35 ++++----
 8 files changed, 144 insertions(+), 139 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index c1583070937d..7794b2b04eb2 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -167,21 +167,7 @@ struct thread_vr_state {
 	vector128	vscr __attribute__((aligned(16)));
 };
 
-struct thread_struct {
-	unsigned long	ksp;		/* Kernel stack pointer */
-#ifdef CONFIG_PPC64
-	unsigned long	ksp_vsid;
-#endif
-	struct pt_regs	*regs;		/* Pointer to saved register state */
-	mm_segment_t	fs;		/* for get_fs() validation */
-#ifdef CONFIG_BOOKE
-	/* BookE base exception scratch space; align on cacheline */
-	unsigned long	normsave[8] ____cacheline_aligned;
-#endif
-#ifdef CONFIG_PPC32
-	void		*pgdir;		/* root of page-table tree */
-	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
-#endif
+struct debug_reg {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	/*
 	 * The following help to manage the use of Debug Control Registers
@@ -218,6 +204,24 @@ struct thread_struct {
 	unsigned long	dvc2;
 #endif
 #endif
+};
+
+struct thread_struct {
+	unsigned long	ksp;		/* Kernel stack pointer */
+#ifdef CONFIG_PPC64
+	unsigned long	ksp_vsid;
+#endif
+	struct pt_regs	*regs;		/* Pointer to saved register state */
+	mm_segment_t	fs;		/* for get_fs() validation */
+#ifdef CONFIG_BOOKE
+	/* BookE base exception scratch space; align on cacheline */
+	unsigned long	normsave[8] ____cacheline_aligned;
+#endif
+#ifdef CONFIG_PPC32
+	void		*pgdir;		/* root of page-table tree */
+	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
+#endif
+	struct debug_reg debug;
 	struct thread_fp_state	fp_state;
 	struct thread_fp_state	*fp_save_area;
 	int		fpexc_mode;	/* floating-point exception mode */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index ed8f836da094..2e31aacd8acc 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -381,7 +381,7 @@
 #define DBCR0_IA34T	0x00004000	/* Instr Addr 3-4 range Toggle */
 #define DBCR0_FT	0x00000001	/* Freeze Timers on debug event */
 
-#define dbcr_iac_range(task)	((task)->thread.dbcr0)
+#define dbcr_iac_range(task)	((task)->thread.debug.dbcr0)
 #define DBCR_IAC12I	DBCR0_IA12			/* Range Inclusive */
 #define DBCR_IAC12X	(DBCR0_IA12 | DBCR0_IA12X)	/* Range Exclusive */
 #define DBCR_IAC12MODE	(DBCR0_IA12 | DBCR0_IA12X)	/* IAC 1-2 Mode Bits */
@@ -395,7 +395,7 @@
 #define DBCR1_DAC1W	0x20000000	/* DAC1 Write Debug Event */
 #define DBCR1_DAC2W	0x10000000	/* DAC2 Write Debug Event */
 
-#define dbcr_dac(task)	((task)->thread.dbcr1)
+#define dbcr_dac(task)	((task)->thread.debug.dbcr1)
 #define DBCR_DAC1R	DBCR1_DAC1R
 #define DBCR_DAC1W	DBCR1_DAC1W
 #define DBCR_DAC2R	DBCR1_DAC2R
@@ -441,7 +441,7 @@
 #define DBCR0_CRET	0x00000020	/* Critical Return Debug Event */
 #define DBCR0_FT	0x00000001	/* Freeze Timers on debug event */
 
-#define dbcr_dac(task)	((task)->thread.dbcr0)
+#define dbcr_dac(task)	((task)->thread.debug.dbcr0)
 #define DBCR_DAC1R	DBCR0_DAC1R
 #define DBCR_DAC1W	DBCR0_DAC1W
 #define DBCR_DAC2R	DBCR0_DAC2R
@@ -475,7 +475,7 @@
 #define DBCR1_IAC34MX	0x000000C0	/* Instr Addr 3-4 range eXclusive */
 #define DBCR1_IAC34AT	0x00000001	/* Instr Addr 3-4 range Toggle */
 
-#define dbcr_iac_range(task)	((task)->thread.dbcr1)
+#define dbcr_iac_range(task)	((task)->thread.debug.dbcr1)
 #define DBCR_IAC12I	DBCR1_IAC12M	/* Range Inclusive */
 #define DBCR_IAC12X	DBCR1_IAC12MX	/* Range Exclusive */
 #define DBCR_IAC12MODE	DBCR1_IAC12MX	/* IAC 1-2 Mode Bits */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6278edddc3f8..e60a3697932c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -115,7 +115,7 @@ int main(void)
 #endif /* CONFIG_SPE */
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-	DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
+	DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0));
 #endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 83079ef159b9..3db9d7e39f39 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -314,28 +314,28 @@ static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);
  */
 static void set_debug_reg_defaults(struct thread_struct *thread)
 {
-	thread->iac1 = thread->iac2 = 0;
+	thread->debug.iac1 = thread->debug.iac2 = 0;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
-	thread->iac3 = thread->iac4 = 0;
+	thread->debug.iac3 = thread->debug.iac4 = 0;
 #endif
-	thread->dac1 = thread->dac2 = 0;
+	thread->debug.dac1 = thread->debug.dac2 = 0;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-	thread->dvc1 = thread->dvc2 = 0;
+	thread->debug.dvc1 = thread->debug.dvc2 = 0;
 #endif
-	thread->dbcr0 = 0;
+	thread->debug.dbcr0 = 0;
 #ifdef CONFIG_BOOKE
 	/*
 	 * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
 	 */
-	thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
+	thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
 			DBCR1_IAC3US | DBCR1_IAC4US;
 	/*
 	 * Force Data Address Compare User/Supervisor bits to be User-only
 	 * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.
 	 */
-	thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+	thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
 #else
-	thread->dbcr1 = 0;
+	thread->debug.dbcr1 = 0;
 #endif
 }
 
@@ -348,22 +348,22 @@ static void prime_debug_regs(struct thread_struct *thread)
 	 */
 	mtmsr(mfmsr() & ~MSR_DE);
 
-	mtspr(SPRN_IAC1, thread->iac1);
-	mtspr(SPRN_IAC2, thread->iac2);
+	mtspr(SPRN_IAC1, thread->debug.iac1);
+	mtspr(SPRN_IAC2, thread->debug.iac2);
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
-	mtspr(SPRN_IAC3, thread->iac3);
-	mtspr(SPRN_IAC4, thread->iac4);
+	mtspr(SPRN_IAC3, thread->debug.iac3);
+	mtspr(SPRN_IAC4, thread->debug.iac4);
 #endif
-	mtspr(SPRN_DAC1, thread->dac1);
-	mtspr(SPRN_DAC2, thread->dac2);
+	mtspr(SPRN_DAC1, thread->debug.dac1);
+	mtspr(SPRN_DAC2, thread->debug.dac2);
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-	mtspr(SPRN_DVC1, thread->dvc1);
-	mtspr(SPRN_DVC2, thread->dvc2);
+	mtspr(SPRN_DVC1, thread->debug.dvc1);
+	mtspr(SPRN_DVC2, thread->debug.dvc2);
 #endif
-	mtspr(SPRN_DBCR0, thread->dbcr0);
-	mtspr(SPRN_DBCR1, thread->dbcr1);
+	mtspr(SPRN_DBCR0, thread->debug.dbcr0);
+	mtspr(SPRN_DBCR1, thread->debug.dbcr1);
 #ifdef CONFIG_BOOKE
-	mtspr(SPRN_DBCR2, thread->dbcr2);
+	mtspr(SPRN_DBCR2, thread->debug.dbcr2);
 #endif
 }
 /*
@@ -373,8 +373,8 @@ static void prime_debug_regs(struct thread_struct *thread)
  */
 static void switch_booke_debug_regs(struct thread_struct *new_thread)
 {
-	if ((current->thread.dbcr0 & DBCR0_IDM)
-		|| (new_thread->dbcr0 & DBCR0_IDM))
+	if ((current->thread.debug.dbcr0 & DBCR0_IDM)
+		|| (new_thread->debug.dbcr0 & DBCR0_IDM))
 			prime_debug_regs(new_thread);
 }
 #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 1ca589c9ec6d..aedfd41d354c 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -855,8 +855,8 @@ void user_enable_single_step(struct task_struct *task)
 
 	if (regs != NULL) {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		task->thread.dbcr0 &= ~DBCR0_BT;
-		task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+		task->thread.debug.dbcr0 &= ~DBCR0_BT;
+		task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
 		regs->msr |= MSR_DE;
 #else
 		regs->msr &= ~MSR_BE;
@@ -872,8 +872,8 @@ void user_enable_block_step(struct task_struct *task)
 
 	if (regs != NULL) {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		task->thread.dbcr0 &= ~DBCR0_IC;
-		task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT;
+		task->thread.debug.dbcr0 &= ~DBCR0_IC;
+		task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
 		regs->msr |= MSR_DE;
 #else
 		regs->msr &= ~MSR_SE;
@@ -895,16 +895,16 @@ void user_disable_single_step(struct task_struct *task)
 		 * And, after doing so, if all debug flags are off, turn
 		 * off DBCR0(IDM) and MSR(DE) .... Torez
 		 */
-		task->thread.dbcr0 &= ~DBCR0_IC;
+		task->thread.debug.dbcr0 &= ~DBCR0_IC;
 		/*
 		 * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
 		 */
-		if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0,
-					task->thread.dbcr1)) {
+		if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+					task->thread.debug.dbcr1)) {
 			/*
 			 * All debug events were off.....
 			 */
-			task->thread.dbcr0 &= ~DBCR0_IDM;
+			task->thread.debug.dbcr0 &= ~DBCR0_IDM;
 			regs->msr &= ~MSR_DE;
 		}
 #else
@@ -1023,14 +1023,14 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	 */
 
 	/* DAC's hold the whole address without any mode flags */
-	task->thread.dac1 = data & ~0x3UL;
+	task->thread.debug.dac1 = data & ~0x3UL;
 
-	if (task->thread.dac1 == 0) {
+	if (task->thread.debug.dac1 == 0) {
 		dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
-		if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0,
-					task->thread.dbcr1)) {
+		if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+					task->thread.debug.dbcr1)) {
 			task->thread.regs->msr &= ~MSR_DE;
-			task->thread.dbcr0 &= ~DBCR0_IDM;
+			task->thread.debug.dbcr0 &= ~DBCR0_IDM;
 		}
 		return 0;
 	}
@@ -1042,7 +1042,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 
 	/* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
 	   register */
-	task->thread.dbcr0 |= DBCR0_IDM;
+	task->thread.debug.dbcr0 |= DBCR0_IDM;
 
 	/* Check for write and read flags and set DBCR0
 	   accordingly */
@@ -1072,10 +1072,10 @@ static long set_instruction_bp(struct task_struct *child,
 			      struct ppc_hw_breakpoint *bp_info)
 {
 	int slot;
-	int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0);
-	int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0);
-	int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0);
-	int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0);
+	int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
+	int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
+	int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
+	int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
 
 	if (dbcr_iac_range(child) & DBCR_IAC12MODE)
 		slot2_in_use = 1;
@@ -1094,9 +1094,9 @@ static long set_instruction_bp(struct task_struct *child,
 		/* We need a pair of IAC regsisters */
 		if ((!slot1_in_use) && (!slot2_in_use)) {
 			slot = 1;
-			child->thread.iac1 = bp_info->addr;
-			child->thread.iac2 = bp_info->addr2;
-			child->thread.dbcr0 |= DBCR0_IAC1;
+			child->thread.debug.iac1 = bp_info->addr;
+			child->thread.debug.iac2 = bp_info->addr2;
+			child->thread.debug.dbcr0 |= DBCR0_IAC1;
 			if (bp_info->addr_mode ==
 					PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
 				dbcr_iac_range(child) |= DBCR_IAC12X;
@@ -1105,9 +1105,9 @@ static long set_instruction_bp(struct task_struct *child,
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 		} else if ((!slot3_in_use) && (!slot4_in_use)) {
 			slot = 3;
-			child->thread.iac3 = bp_info->addr;
-			child->thread.iac4 = bp_info->addr2;
-			child->thread.dbcr0 |= DBCR0_IAC3;
+			child->thread.debug.iac3 = bp_info->addr;
+			child->thread.debug.iac4 = bp_info->addr2;
+			child->thread.debug.dbcr0 |= DBCR0_IAC3;
 			if (bp_info->addr_mode ==
 					PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
 				dbcr_iac_range(child) |= DBCR_IAC34X;
@@ -1127,30 +1127,30 @@ static long set_instruction_bp(struct task_struct *child,
 			 */
 			if (slot2_in_use || (slot3_in_use == slot4_in_use)) {
 				slot = 1;
-				child->thread.iac1 = bp_info->addr;
-				child->thread.dbcr0 |= DBCR0_IAC1;
+				child->thread.debug.iac1 = bp_info->addr;
+				child->thread.debug.dbcr0 |= DBCR0_IAC1;
 				goto out;
 			}
 		}
 		if (!slot2_in_use) {
 			slot = 2;
-			child->thread.iac2 = bp_info->addr;
-			child->thread.dbcr0 |= DBCR0_IAC2;
+			child->thread.debug.iac2 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC2;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 		} else if (!slot3_in_use) {
 			slot = 3;
-			child->thread.iac3 = bp_info->addr;
-			child->thread.dbcr0 |= DBCR0_IAC3;
+			child->thread.debug.iac3 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC3;
 		} else if (!slot4_in_use) {
 			slot = 4;
-			child->thread.iac4 = bp_info->addr;
-			child->thread.dbcr0 |= DBCR0_IAC4;
+			child->thread.debug.iac4 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC4;
 #endif
 		} else
 			return -ENOSPC;
 	}
 out:
-	child->thread.dbcr0 |= DBCR0_IDM;
+	child->thread.debug.dbcr0 |= DBCR0_IDM;
 	child->thread.regs->msr |= MSR_DE;
 
 	return slot;
@@ -1160,49 +1160,49 @@ static int del_instruction_bp(struct task_struct *child, int slot)
 {
 	switch (slot) {
 	case 1:
-		if ((child->thread.dbcr0 & DBCR0_IAC1) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
 			/* address range - clear slots 1 & 2 */
-			child->thread.iac2 = 0;
+			child->thread.debug.iac2 = 0;
 			dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
 		}
-		child->thread.iac1 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC1;
+		child->thread.debug.iac1 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
 		break;
 	case 2:
-		if ((child->thread.dbcr0 & DBCR0_IAC2) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC12MODE)
 			/* used in a range */
 			return -EINVAL;
-		child->thread.iac2 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC2;
+		child->thread.debug.iac2 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
 		break;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 	case 3:
-		if ((child->thread.dbcr0 & DBCR0_IAC3) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
 			/* address range - clear slots 3 & 4 */
-			child->thread.iac4 = 0;
+			child->thread.debug.iac4 = 0;
 			dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
 		}
-		child->thread.iac3 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC3;
+		child->thread.debug.iac3 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
 		break;
 	case 4:
-		if ((child->thread.dbcr0 & DBCR0_IAC4) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC34MODE)
 			/* Used in a range */
 			return -EINVAL;
-		child->thread.iac4 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC4;
+		child->thread.debug.iac4 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
 		break;
 #endif
 	default:
@@ -1232,18 +1232,18 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
 			dbcr_dac(child) |= DBCR_DAC1R;
 		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
 			dbcr_dac(child) |= DBCR_DAC1W;
-		child->thread.dac1 = (unsigned long)bp_info->addr;
+		child->thread.debug.dac1 = (unsigned long)bp_info->addr;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
 		if (byte_enable) {
-			child->thread.dvc1 =
+			child->thread.debug.dvc1 =
 				(unsigned long)bp_info->condition_value;
-			child->thread.dbcr2 |=
+			child->thread.debug.dbcr2 |=
 				((byte_enable << DBCR2_DVC1BE_SHIFT) |
 				 (condition_mode << DBCR2_DVC1M_SHIFT));
 		}
 #endif
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-	} else if (child->thread.dbcr2 & DBCR2_DAC12MODE) {
+	} else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
 		/* Both dac1 and dac2 are part of a range */
 		return -ENOSPC;
 #endif
@@ -1253,19 +1253,19 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
 			dbcr_dac(child) |= DBCR_DAC2R;
 		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
 			dbcr_dac(child) |= DBCR_DAC2W;
-		child->thread.dac2 = (unsigned long)bp_info->addr;
+		child->thread.debug.dac2 = (unsigned long)bp_info->addr;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
 		if (byte_enable) {
-			child->thread.dvc2 =
+			child->thread.debug.dvc2 =
 				(unsigned long)bp_info->condition_value;
-			child->thread.dbcr2 |=
+			child->thread.debug.dbcr2 |=
 				((byte_enable << DBCR2_DVC2BE_SHIFT) |
 				 (condition_mode << DBCR2_DVC2M_SHIFT));
 		}
 #endif
 	} else
 		return -ENOSPC;
-	child->thread.dbcr0 |= DBCR0_IDM;
+	child->thread.debug.dbcr0 |= DBCR0_IDM;
 	child->thread.regs->msr |= MSR_DE;
 
 	return slot + 4;
@@ -1277,32 +1277,32 @@ static int del_dac(struct task_struct *child, int slot)
 		if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
 			return -ENOENT;
 
-		child->thread.dac1 = 0;
+		child->thread.debug.dac1 = 0;
 		dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-		if (child->thread.dbcr2 & DBCR2_DAC12MODE) {
-			child->thread.dac2 = 0;
-			child->thread.dbcr2 &= ~DBCR2_DAC12MODE;
+		if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+			child->thread.debug.dac2 = 0;
+			child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
 		}
-		child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
+		child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
 #endif
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-		child->thread.dvc1 = 0;
+		child->thread.debug.dvc1 = 0;
 #endif
 	} else if (slot == 2) {
 		if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
 			return -ENOENT;
 
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-		if (child->thread.dbcr2 & DBCR2_DAC12MODE)
+		if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
 			/* Part of a range */
 			return -EINVAL;
-		child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
+		child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
 #endif
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-		child->thread.dvc2 = 0;
+		child->thread.debug.dvc2 = 0;
 #endif
-		child->thread.dac2 = 0;
+		child->thread.debug.dac2 = 0;
 		dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
 	} else
 		return -EINVAL;
@@ -1344,22 +1344,22 @@ static int set_dac_range(struct task_struct *child,
 			return -EIO;
 	}
 
-	if (child->thread.dbcr0 &
+	if (child->thread.debug.dbcr0 &
 	    (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
 		return -ENOSPC;
 
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
-		child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
+		child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
-		child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
-	child->thread.dac1 = bp_info->addr;
-	child->thread.dac2 = bp_info->addr2;
+		child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
+	child->thread.debug.dac1 = bp_info->addr;
+	child->thread.debug.dac2 = bp_info->addr2;
 	if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
-		child->thread.dbcr2  |= DBCR2_DAC12M;
+		child->thread.debug.dbcr2  |= DBCR2_DAC12M;
 	else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
-		child->thread.dbcr2  |= DBCR2_DAC12MX;
+		child->thread.debug.dbcr2  |= DBCR2_DAC12MX;
 	else	/* PPC_BREAKPOINT_MODE_MASK */
-		child->thread.dbcr2  |= DBCR2_DAC12MM;
+		child->thread.debug.dbcr2  |= DBCR2_DAC12MM;
 	child->thread.regs->msr |= MSR_DE;
 
 	return 5;
@@ -1490,9 +1490,9 @@ static long ppc_del_hwdebug(struct task_struct *child, long data)
 		rc = del_dac(child, (int)data - 4);
 
 	if (!rc) {
-		if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0,
-					child->thread.dbcr1)) {
-			child->thread.dbcr0 &= ~DBCR0_IDM;
+		if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
+					child->thread.debug.dbcr1)) {
+			child->thread.debug.dbcr0 &= ~DBCR0_IDM;
 			child->thread.regs->msr &= ~MSR_DE;
 		}
 	}
@@ -1670,7 +1670,7 @@ long arch_ptrace(struct task_struct *child, long request,
 		if (addr > 0)
 			break;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		ret = put_user(child->thread.dac1, datalp);
+		ret = put_user(child->thread.debug.dac1, datalp);
 #else
 		dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
 			     (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 097f8dc426a0..f52b7db327c8 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -266,7 +266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if (addr > 0)
 			break;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		ret = put_user(child->thread.dac1, (u32 __user *)data);
+		ret = put_user(child->thread.debug.dac1, (u32 __user *)data);
 #else
 		dabr_fake = (
 			(child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index c094e28b3f10..1a410aa57fb7 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1312,7 +1312,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 	unsigned char tmp;
 	unsigned long new_msr = regs->msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-	unsigned long new_dbcr0 = current->thread.dbcr0;
+	unsigned long new_dbcr0 = current->thread.debug.dbcr0;
 #endif
 
 	for (i=0; i<ndbg; i++) {
@@ -1327,7 +1327,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 			} else {
 				new_dbcr0 &= ~DBCR0_IC;
 				if (!DBCR_ACTIVE_EVENTS(new_dbcr0,
-						current->thread.dbcr1)) {
+						current->thread.debug.dbcr1)) {
 					new_msr &= ~MSR_DE;
 					new_dbcr0 &= ~DBCR0_IDM;
 				}
@@ -1362,7 +1362,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 	   the user is really doing something wrong. */
 	regs->msr = new_msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-	current->thread.dbcr0 = new_dbcr0;
+	current->thread.debug.dbcr0 = new_dbcr0;
 #endif
 
 	if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 36a1f95a2a36..f686686ca0aa 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -351,8 +351,8 @@ static inline int check_io_access(struct pt_regs *regs)
 #define REASON_TRAP		ESR_PTR
 
 /* single-step stuff */
-#define single_stepping(regs)	(current->thread.dbcr0 & DBCR0_IC)
-#define clear_single_step(regs)	(current->thread.dbcr0 &= ~DBCR0_IC)
+#define single_stepping(regs)	(current->thread.debug.dbcr0 & DBCR0_IC)
+#define clear_single_step(regs)	(current->thread.debug.dbcr0 &= ~DBCR0_IC)
 
 #else
 /* On non-4xx, the reason for the machine check or program
@@ -1489,7 +1489,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
 	if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
 		dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-		current->thread.dbcr2 &= ~DBCR2_DAC12MODE;
+		current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
 #endif
 		do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT,
 			     5);
@@ -1500,24 +1500,24 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
 			     6);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC1) {
-		current->thread.dbcr0 &= ~DBCR0_IAC1;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
 		dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
 		do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT,
 			     1);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC2) {
-		current->thread.dbcr0 &= ~DBCR0_IAC2;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
 		do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT,
 			     2);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC3) {
-		current->thread.dbcr0 &= ~DBCR0_IAC3;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
 		dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
 		do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT,
 			     3);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC4) {
-		current->thread.dbcr0 &= ~DBCR0_IAC4;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
 		do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT,
 			     4);
 		changed |= 0x01;
@@ -1527,19 +1527,20 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
 	 * Check all other debug flags and see if that bit needs to be turned
 	 * back on or not.
 	 */
-	if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1))
+	if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
+	    current->thread.debug.dbcr1))
 		regs->msr |= MSR_DE;
 	else
 		/* Make sure the IDM flag is off */
-		current->thread.dbcr0 &= ~DBCR0_IDM;
+		current->thread.debug.dbcr0 &= ~DBCR0_IDM;
 
 	if (changed & 0x01)
-		mtspr(SPRN_DBCR0, current->thread.dbcr0);
+		mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
 }
 
 void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 {
-	current->thread.dbsr = debug_status;
+	current->thread.debug.dbsr = debug_status;
 
 	/* Hack alert: On BookE, Branch Taken stops on the branch itself, while
 	 * on server, it stops on the target of the branch. In order to simulate
@@ -1556,8 +1557,8 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 
 		/* Do the single step trick only when coming from userspace */
 		if (user_mode(regs)) {
-			current->thread.dbcr0 &= ~DBCR0_BT;
-			current->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+			current->thread.debug.dbcr0 &= ~DBCR0_BT;
+			current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
 			regs->msr |= MSR_DE;
 			return;
 		}
@@ -1585,13 +1586,13 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 			return;
 
 		if (user_mode(regs)) {
-			current->thread.dbcr0 &= ~DBCR0_IC;
-			if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0,
-					       current->thread.dbcr1))
+			current->thread.debug.dbcr0 &= ~DBCR0_IC;
+			if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
+					       current->thread.debug.dbcr1))
 				regs->msr |= MSR_DE;
 			else
 				/* Make sure the IDM bit is off */
-				current->thread.dbcr0 &= ~DBCR0_IDM;
+				current->thread.debug.dbcr0 &= ~DBCR0_IDM;
 		}
 
 		_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
-- 
cgit v1.2.3


From 3743c9b8ceb638b6e4b78b42f2262e22aa6359f0 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <r65777@freescale.com>
Date: Thu, 4 Jul 2013 12:27:44 +0530
Subject: powerpc: export debug registers save function for KVM

KVM need this function when switching from vcpu to user-space
thread. My subsequent patch will use this function.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
Acked-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/include/asm/switch_to.h | 1 +
 arch/powerpc/kernel/process.c        | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 2be5618cdec6..9ee12610af02 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -35,6 +35,7 @@ extern void giveup_vsx(struct task_struct *);
 extern void enable_kernel_spe(void);
 extern void giveup_spe(struct task_struct *);
 extern void load_up_spe(struct task_struct *);
+extern void switch_booke_debug_regs(struct thread_struct *new_thread);
 
 #ifndef CONFIG_SMP
 extern void discard_lazy_cpu_state(void);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 3db9d7e39f39..4d42c4de8b9b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -371,12 +371,13 @@ static void prime_debug_regs(struct thread_struct *thread)
  * debug registers, set the debug registers from the values
  * stored in the new thread.
  */
-static void switch_booke_debug_regs(struct thread_struct *new_thread)
+void switch_booke_debug_regs(struct thread_struct *new_thread)
 {
 	if ((current->thread.debug.dbcr0 & DBCR0_IDM)
 		|| (new_thread->debug.dbcr0 & DBCR0_IDM))
 			prime_debug_regs(new_thread);
 }
+EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
 #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
 static void set_debug_reg_defaults(struct thread_struct *thread)
-- 
cgit v1.2.3


From a3821b2af185b64e3382c45fbdaa2cbc91ce14b8 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Mon, 28 Oct 2013 22:07:59 -0500
Subject: powerpc: Fix PPC_EMULATED_STATS build break with sync patch

Commit 9863c28a2af90a56c088f5f6288d7f6d2c923c14 ("powerpc: Emulate sync
instruction variants") introduced a build breakage with
CONFIG_PPC_EMULATED_STATS enabled.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Cc: Kumar Gala <galak@kernel.org>
Cc: James Yang <James.Yang@freescale.com>
---
---
 arch/powerpc/include/asm/emulated_ops.h | 1 +
 arch/powerpc/kernel/traps.c             | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index 5a8b82aa7241..4358e3002f35 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -43,6 +43,7 @@ extern struct ppc_emulated {
 	struct ppc_emulated_entry popcntb;
 	struct ppc_emulated_entry spe;
 	struct ppc_emulated_entry string;
+	struct ppc_emulated_entry sync;
 	struct ppc_emulated_entry unaligned;
 #ifdef CONFIG_MATH_EMULATION
 	struct ppc_emulated_entry math;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index ad20dcfaedd0..62c3dd8c69f2 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1820,6 +1820,7 @@ struct ppc_emulated ppc_emulated = {
 	WARN_EMULATED_SETUP(popcntb),
 	WARN_EMULATED_SETUP(spe),
 	WARN_EMULATED_SETUP(string),
+	WARN_EMULATED_SETUP(sync),
 	WARN_EMULATED_SETUP(unaligned),
 #ifdef CONFIG_MATH_EMULATION
 	WARN_EMULATED_SETUP(math),
-- 
cgit v1.2.3


From b88c4767d9e2290aaa22b8b3702ad72af0ebd113 Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Mon, 28 Oct 2013 09:20:51 -0500
Subject: powerpc: Move local setup.h declarations to arch includes

Move the few declarations from arch/powerpc/kernel/setup.h
into arch/powerpc/include/asm/setup.h.  This resolves a
sparse warning for arch/powerpc/mm/numa.c which defines
do_init_bootmem() but can't include the setup.h header
in the prior path.

Resolves:
arch/powerpc/mm/numa.c:998:13:
        warning: symbol 'do_init_bootmem' was not declared.
                 Should it be static?

Signed-off-by: Robert C Jennings <rcj@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/setup.h   | 4 ++++
 arch/powerpc/kernel/module.c       | 3 +--
 arch/powerpc/kernel/module_32.c    | 3 +--
 arch/powerpc/kernel/module_64.c    | 3 +--
 arch/powerpc/kernel/setup-common.c | 2 --
 arch/powerpc/kernel/setup.h        | 9 ---------
 arch/powerpc/kernel/setup_32.c     | 2 --
 arch/powerpc/kernel/setup_64.c     | 2 --
 arch/powerpc/kernel/vdso.c         | 3 +--
 9 files changed, 8 insertions(+), 23 deletions(-)
 delete mode 100644 arch/powerpc/kernel/setup.h

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index d3ca85529b8b..703a8412dac2 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -23,6 +23,10 @@ extern void reloc_got2(unsigned long);
 
 #define PTRRELOC(x)	((typeof(x)) add_reloc_offset((unsigned long)(x)))
 
+void check_for_initrd(void);
+void do_init_bootmem(void);
+void setup_panic(void);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif	/* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 2d275707f419..9547381b631a 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -25,8 +25,7 @@
 #include <asm/uaccess.h>
 #include <asm/firmware.h>
 #include <linux/sort.h>
-
-#include "setup.h"
+#include <asm/setup.h>
 
 LIST_HEAD(module_bug_list);
 
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 2e3200ca485f..6cff040bf456 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -26,8 +26,7 @@
 #include <linux/cache.h>
 #include <linux/bug.h>
 #include <linux/sort.h>
-
-#include "setup.h"
+#include <asm/setup.h>
 
 #if 0
 #define DEBUGP printk
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index a102f4412392..12664c130d73 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -26,8 +26,7 @@
 #include <asm/firmware.h>
 #include <asm/code-patching.h>
 #include <linux/sort.h>
-
-#include "setup.h"
+#include <asm/setup.h>
 
 /* FIXME: We don't do .init separately.  To do this, we'd need to have
    a separate r2 value in the init and core section, and stub between
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 3d261c071fc8..febc80445d25 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -62,8 +62,6 @@
 #include <mm/mmu_decl.h>
 #include <asm/fadump.h>
 
-#include "setup.h"
-
 #ifdef DEBUG
 #include <asm/udbg.h>
 #define DBG(fmt...) udbg_printf(fmt)
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
deleted file mode 100644
index 4c67ad7fae08..000000000000
--- a/arch/powerpc/kernel/setup.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _POWERPC_KERNEL_SETUP_H
-#define _POWERPC_KERNEL_SETUP_H
-
-void check_for_initrd(void);
-void do_init_bootmem(void);
-void setup_panic(void);
-extern int do_early_xmon;
-
-#endif /* _POWERPC_KERNEL_SETUP_H */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index a4bbcae72578..b903dc5cf944 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -40,8 +40,6 @@
 #include <asm/mmu_context.h>
 #include <asm/epapr_hcalls.h>
 
-#include "setup.h"
-
 #define DBG(fmt...)
 
 extern void bootx_init(unsigned long r4, unsigned long phys);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 278ca93e1f28..4085aaa9478f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -68,8 +68,6 @@
 #include <asm/hugetlb.h>
 #include <asm/epapr_hcalls.h>
 
-#include "setup.h"
-
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
 #else
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 1d9c92621b36..094e45c16a17 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -34,8 +34,7 @@
 #include <asm/firmware.h>
 #include <asm/vdso.h>
 #include <asm/vdso_datapage.h>
-
-#include "setup.h"
+#include <asm/setup.h>
 
 #undef DEBUG
 
-- 
cgit v1.2.3


From b83941798c35f9cffba36927011df2b53c3884d8 Mon Sep 17 00:00:00 2001
From: Vaishnavi Bhat <vaishnavi@linux.vnet.ibm.com>
Date: Sun, 27 Oct 2013 11:47:19 +0530
Subject: powerpc: Fix a typo in comments of va to pa conversion

This patch fixes typo in comments virtual to physical
address conversion.

Signed-off-by: Vaishnavi Bhat <vaishnavi@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/page.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index b9f426212d3a..753c66206a15 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -78,7 +78,7 @@ extern unsigned int HPAGE_SHIFT;
  *
  * Also, KERNELBASE >= PAGE_OFFSET and PHYSICAL_START >= MEMORY_START
  *
- * There are two was to determine a physical address from a virtual one:
+ * There are two ways to determine a physical address from a virtual one:
  * va = pa + PAGE_OFFSET - MEMORY_START
  * va = pa + KERNELBASE - PHYSICAL_START
  *
-- 
cgit v1.2.3


From ecb35c3943040f4db735c7d14c24ee7750cbb482 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Thu, 17 Oct 2013 17:08:28 +1100
Subject: powerpc: Fix 64K page size support for PPC44x

PPC44x supports page sizes other than 4K however when 64K page sizes
are selected compilation fails. This is due to a change in the
definition of pgtable_t introduced by the following patch:

commit 5c1f6ee9a31cbdac90bbb8ae1ba4475031ac74b4
Author: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
powerpc: Reduce PTE table memory wastage

The above patch only implements the new layout for PPC64 so it doesn't
compile for PPC32 with a 64K page size. Ideally we should implement
the same layout for PPC32 however for the meantime this patch reverts
the definition of pgtable_t for PPC32.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/page.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 753c66206a15..32e4e212b9c1 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -403,7 +403,7 @@ void arch_free_page(struct page *page, int order);
 
 struct vm_area_struct;
 
-#ifdef CONFIG_PPC_64K_PAGES
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64)
 typedef pte_t *pgtable_t;
 #else
 typedef struct page *pgtable_t;
-- 
cgit v1.2.3


From 733187e29576041ceccf3b82092ca900fc929170 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 20 Oct 2013 10:26:20 +1100
Subject: powerpc/pseries: Fix dedicated processor partition detection

commit f13c13a00512 (powerpc: Stop using non-architected shared_proc
field in lppaca) fixed a potential issue with shared/dedicated
partition detection. The old method of detection relied on an
unarchitected field (shared_proc), and this patch switched
to using something architected (a non zero yield_count).

Unfortunately the assertion in the Linux header that yield_count
is only non zero on shared processor partitions is not true. It
turns out dedicated processor partitions can increment yield_count
and as such we falsely detect dedicated partitions as shared.

Fix the comment, and switch back to using the old method.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/lppaca.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 4470d1e34d23..844c28de7ec0 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -84,8 +84,8 @@ struct lppaca {
 	 * the processor is yielded (either because of an OS yield or a
 	 * hypervisor preempt).  An even value implies that the processor is
 	 * currently executing.
-	 * NOTE: This value will ALWAYS be zero for dedicated processors and
-	 * will NEVER be zero for shared processors (ie, initialized to a 1).
+	 * NOTE: Even dedicated processor partitions can yield so this
+	 * field cannot be used to determine if we are shared or dedicated.
 	 */
 	volatile __be32 yield_count;
 	volatile __be32 dispersion_count; /* dispatch changed physical cpu */
@@ -106,15 +106,15 @@ extern struct lppaca lppaca[];
 #define lppaca_of(cpu)	(*paca[cpu].lppaca_ptr)
 
 /*
- * Old kernels used a reserved bit in the VPA to determine if it was running
- * in shared processor mode. New kernels look for a non zero yield count
- * but KVM still needs to set the bit to keep the old stuff happy.
+ * We are using a non architected field to determine if a partition is
+ * shared or dedicated. This currently works on both KVM and PHYP, but
+ * we will have to transition to something better.
  */
 #define LPPACA_OLD_SHARED_PROC		2
 
 static inline bool lppaca_shared_proc(struct lppaca *l)
 {
-	return l->yield_count != 0;
+	return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
 }
 
 /*
-- 
cgit v1.2.3


From ef1313deafb7baa6d3382044e962d5ad5e8c8dd6 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 14 Oct 2013 21:03:58 +1100
Subject: powerpc: Add VMX optimised xor for RAID5

Add a VMX optimised xor, used primarily for RAID5. On a POWER7 blade
this is a decent win:

   32regs    : 17932.800 MB/sec
   altivec   : 19724.800 MB/sec

The bigger gain is when the same test is run in SMT4 mode, as it
would if there was a lot of work going on:

   8regs     :  8377.600 MB/sec
   altivec   : 15801.600 MB/sec

I tested this against an array created without the patch, and also
verified it worked as expected on a little endian kernel.

[ Fix !CONFIG_ALTIVEC build -- BenH ]

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/xor.h |  67 ++++++++++++++++
 arch/powerpc/lib/Makefile      |   3 +
 arch/powerpc/lib/xor_vmx.c     | 177 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 247 insertions(+)
 create mode 100644 arch/powerpc/lib/xor_vmx.c

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h
index c82eb12a5b18..0abb97f3be10 100644
--- a/arch/powerpc/include/asm/xor.h
+++ b/arch/powerpc/include/asm/xor.h
@@ -1 +1,68 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#ifndef _ASM_POWERPC_XOR_H
+#define _ASM_POWERPC_XOR_H
+
+#ifdef CONFIG_ALTIVEC
+
+#include <asm/cputable.h>
+
+void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in);
+void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in);
+void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in,
+		   unsigned long *v4_in);
+void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in,
+		   unsigned long *v4_in, unsigned long *v5_in);
+
+static struct xor_block_template xor_block_altivec = {
+	.name = "altivec",
+	.do_2 = xor_altivec_2,
+	.do_3 = xor_altivec_3,
+	.do_4 = xor_altivec_4,
+	.do_5 = xor_altivec_5,
+};
+
+#define XOR_SPEED_ALTIVEC()				\
+	do {						\
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))	\
+			xor_speed(&xor_block_altivec);	\
+	} while (0)
+#else
+#define XOR_SPEED_ALTIVEC()
+#endif
+
+/* Also try the generic routines. */
 #include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+do {							\
+	xor_speed(&xor_block_8regs);			\
+	xor_speed(&xor_block_8regs_p);			\
+	xor_speed(&xor_block_32regs);			\
+	xor_speed(&xor_block_32regs_p);			\
+	XOR_SPEED_ALTIVEC();				\
+} while (0)
+
+#endif /* _ASM_POWERPC_XOR_H */
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 5310132856c1..95a20e17dbff 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -39,3 +39,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
 obj-y			+= code-patching.o
 obj-y			+= feature-fixups.o
 obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
+
+obj-$(CONFIG_ALTIVEC)	+= xor_vmx.o
+CFLAGS_xor_vmx.o += -maltivec -mabi=altivec
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
new file mode 100644
index 000000000000..e905f7c2ea7b
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -0,0 +1,177 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <altivec.h>
+
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+
+typedef vector signed char unative_t;
+
+#define DEFINE(V)				\
+	unative_t *V = (unative_t *)V##_in;	\
+	unative_t V##_0, V##_1, V##_2, V##_3
+
+#define LOAD(V)			\
+	do {			\
+		V##_0 = V[0];	\
+		V##_1 = V[1];	\
+		V##_2 = V[2];	\
+		V##_3 = V[3];	\
+	} while (0)
+
+#define STORE(V)		\
+	do {			\
+		V[0] = V##_0;	\
+		V[1] = V##_1;	\
+		V[2] = V##_2;	\
+		V[3] = V##_3;	\
+	} while (0)
+
+#define XOR(V1, V2)					\
+	do {						\
+		V1##_0 = vec_xor(V1##_0, V2##_0);	\
+		V1##_1 = vec_xor(V1##_1, V2##_1);	\
+		V1##_2 = vec_xor(V1##_2, V2##_2);	\
+		V1##_3 = vec_xor(V1##_3, V2##_3);	\
+	} while (0)
+
+void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	preempt_disable();
+	enable_kernel_altivec();
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		XOR(v1, v2);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+	} while (--lines > 0);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_2);
+
+void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	DEFINE(v3);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	preempt_disable();
+	enable_kernel_altivec();
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		LOAD(v3);
+		XOR(v1, v2);
+		XOR(v1, v3);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+		v3 += 4;
+	} while (--lines > 0);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_3);
+
+void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in,
+		   unsigned long *v4_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	DEFINE(v3);
+	DEFINE(v4);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	preempt_disable();
+	enable_kernel_altivec();
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		LOAD(v3);
+		LOAD(v4);
+		XOR(v1, v2);
+		XOR(v3, v4);
+		XOR(v1, v3);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+		v3 += 4;
+		v4 += 4;
+	} while (--lines > 0);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_4);
+
+void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in,
+		   unsigned long *v4_in, unsigned long *v5_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	DEFINE(v3);
+	DEFINE(v4);
+	DEFINE(v5);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	preempt_disable();
+	enable_kernel_altivec();
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		LOAD(v3);
+		LOAD(v4);
+		LOAD(v5);
+		XOR(v1, v2);
+		XOR(v3, v4);
+		XOR(v1, v5);
+		XOR(v1, v3);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+		v3 += 4;
+		v4 += 4;
+		v5 += 4;
+	} while (--lines > 0);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_5);
-- 
cgit v1.2.3


From 6f68b5e2c6c04e9cf0e3074f884da36957ce9aae Mon Sep 17 00:00:00 2001
From: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Date: Tue, 27 Aug 2013 15:09:52 +0530
Subject: powerpc/powernv: Create opal sysfs directory

Create /sys/firmware/opal directory. We wil use this
interface to fetch opal error logs, firmware update, etc.

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/opal.h       |  3 +++
 arch/powerpc/platforms/powernv/opal.c | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 4cc33ba1edd3..ee0efd2f2abc 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -601,6 +601,9 @@ typedef struct oppanel_line {
 	uint64_t 	line_len;
 } oppanel_line_t;
 
+/* /sys/firmware/opal */
+extern struct kobject *opal_kobj;
+
 /* API functions */
 int64_t opal_console_write(int64_t term_number, __be64 *length,
 			   const uint8_t *buffer);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 09336f0c54c5..37f06580709a 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -17,11 +17,15 @@
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 #include <linux/slab.h>
+#include <linux/kobject.h>
 #include <asm/opal.h>
 #include <asm/firmware.h>
 
 #include "powernv.h"
 
+/* /sys/firmware/opal */
+struct kobject *opal_kobj;
+
 struct opal {
 	u64 base;
 	u64 entry;
@@ -375,6 +379,17 @@ static irqreturn_t opal_interrupt(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+static int opal_sysfs_init(void)
+{
+	opal_kobj = kobject_create_and_add("opal", firmware_kobj);
+	if (!opal_kobj) {
+		pr_warn("kobject_create_and_add opal failed\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static int __init opal_init(void)
 {
 	struct device_node *np, *consoles;
@@ -420,6 +435,10 @@ static int __init opal_init(void)
 				   " (0x%x)\n", rc, irq, hwirq);
 		opal_irqs[i] = irq;
 	}
+
+	/* Create "opal" kobject under /sys/firmware */
+	rc = opal_sysfs_init();
+
 	return 0;
 }
 subsys_initcall(opal_init);
-- 
cgit v1.2.3


From 50bd6153d1a68354a0a0c8bca1fe949fa8875875 Mon Sep 17 00:00:00 2001
From: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Date: Thu, 24 Oct 2013 16:04:58 +0530
Subject: powerpc/powernv: Code update interface

Code update interface for powernv platform. This provides
sysfs interface to pass new image, validate, update and
commit images.

This patch includes:
  - Below OPAL APIs for code update
    - opal_validate_flash()
    - opal_manage_flash()
    - opal_update_flash()

  - Create below sysfs files under /sys/firmware/opal
    - image		: Interface to pass new FW image
    - validate_flash	: Validate candidate image
    - manage_flash	: Commit/Reject operations
    - update_flash	: Flash new candidate image

Updating Image:
  "update_flash" is an interface to indicate flash new FW.
It just passes image SG list to FW. Actual flashing is done
during system reboot time.

Note:
  - SG entry format:
    I have kept version number to keep this list similar to what
    PAPR is defined.

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/opal.h                |   7 +
 arch/powerpc/platforms/powernv/Makefile        |   3 +-
 arch/powerpc/platforms/powernv/opal-flash.c    | 667 +++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal-wrappers.S |   3 +
 arch/powerpc/platforms/powernv/opal.c          |   4 +
 5 files changed, 683 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/powernv/opal-flash.c

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index ee0efd2f2abc..033c06be1d84 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -129,6 +129,9 @@ extern int opal_enter_rtas(struct rtas_args *args,
 #define OPAL_LPC_READ				67
 #define OPAL_LPC_WRITE				68
 #define OPAL_RETURN_CPU				69
+#define OPAL_FLASH_VALIDATE			76
+#define OPAL_FLASH_MANAGE			77
+#define OPAL_FLASH_UPDATE			78
 
 #ifndef __ASSEMBLY__
 
@@ -724,6 +727,9 @@ int64_t opal_lpc_write(uint32_t chip_id, enum OpalLPCAddressType addr_type,
 		       uint32_t addr, uint32_t data, uint32_t sz);
 int64_t opal_lpc_read(uint32_t chip_id, enum OpalLPCAddressType addr_type,
 		      uint32_t addr, uint32_t *data, uint32_t sz);
+int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result);
+int64_t opal_manage_flash(uint8_t op);
+int64_t opal_update_flash(uint64_t blk_list);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data);
@@ -752,6 +758,7 @@ extern int opal_set_rtc_time(struct rtc_time *tm);
 extern void opal_get_rtc_time(struct rtc_time *tm);
 extern unsigned long opal_get_boot_time(void);
 extern void opal_nvram_init(void);
+extern void opal_flash_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 050d57e0c788..873fa1370dc4 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -1,5 +1,6 @@
 obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o
-obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o rng.o
+obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
+obj-y			+= rng.o
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
new file mode 100644
index 000000000000..6ffa6b1ec5b7
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -0,0 +1,667 @@
+/*
+ * PowerNV OPAL Firmware Update Interface
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define DEBUG
+
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+
+#include <asm/opal.h>
+
+/* FLASH status codes */
+#define FLASH_NO_OP		-1099	/* No operation initiated by user */
+#define FLASH_NO_AUTH		-9002	/* Not a service authority partition */
+
+/* Validate image status values */
+#define VALIDATE_IMG_READY	-1001	/* Image ready for validation */
+#define VALIDATE_IMG_INCOMPLETE	-1002	/* User copied < VALIDATE_BUF_SIZE */
+
+/* Manage image status values */
+#define MANAGE_ACTIVE_ERR	-9001	/* Cannot overwrite active img */
+
+/* Flash image status values */
+#define FLASH_IMG_READY		0	/* Img ready for flash on reboot */
+#define FLASH_INVALID_IMG	-1003	/* Flash image shorter than expected */
+#define FLASH_IMG_NULL_DATA	-1004	/* Bad data in sg list entry */
+#define FLASH_IMG_BAD_LEN	-1005	/* Bad length in sg list entry */
+
+/* Manage operation tokens */
+#define FLASH_REJECT_TMP_SIDE	0	/* Reject temporary fw image */
+#define FLASH_COMMIT_TMP_SIDE	1	/* Commit temporary fw image */
+
+/* Update tokens */
+#define FLASH_UPDATE_CANCEL	0	/* Cancel update request */
+#define FLASH_UPDATE_INIT	1	/* Initiate update */
+
+/* Validate image update result tokens */
+#define VALIDATE_TMP_UPDATE	0     /* T side will be updated */
+#define VALIDATE_FLASH_AUTH	1     /* Partition does not have authority */
+#define VALIDATE_INVALID_IMG	2     /* Candidate image is not valid */
+#define VALIDATE_CUR_UNKNOWN	3     /* Current fixpack level is unknown */
+/*
+ * Current T side will be committed to P side before being replace with new
+ * image, and the new image is downlevel from current image
+ */
+#define VALIDATE_TMP_COMMIT_DL	4
+/*
+ * Current T side will be committed to P side before being replaced with new
+ * image
+ */
+#define VALIDATE_TMP_COMMIT	5
+/*
+ * T side will be updated with a downlevel image
+ */
+#define VALIDATE_TMP_UPDATE_DL	6
+/*
+ * The candidate image's release date is later than the system's firmware
+ * service entitlement date - service warranty period has expired
+ */
+#define VALIDATE_OUT_OF_WRNTY	7
+
+/* Validate buffer size */
+#define VALIDATE_BUF_SIZE	4096
+
+/* XXX: Assume candidate image size is <= 256MB */
+#define MAX_IMAGE_SIZE	0x10000000
+
+/* Flash sg list version */
+#define SG_LIST_VERSION (1UL)
+
+/* Image status */
+enum {
+	IMAGE_INVALID,
+	IMAGE_LOADING,
+	IMAGE_READY,
+};
+
+/* Candidate image data */
+struct image_data_t {
+	int		status;
+	void		*data;
+	uint32_t	size;
+};
+
+/* Candidate image header */
+struct image_header_t {
+	uint16_t	magic;
+	uint16_t	version;
+	uint32_t	size;
+};
+
+/* Scatter/gather entry */
+struct opal_sg_entry {
+	void	*data;
+	long	length;
+};
+
+/* We calculate number of entries based on PAGE_SIZE */
+#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
+
+/*
+ * This struct is very similar but not identical to that
+ * needed by the opal flash update. All we need to do for
+ * opal is rewrite num_entries into a version/length and
+ * translate the pointers to absolute.
+ */
+struct opal_sg_list {
+	unsigned long num_entries;
+	struct opal_sg_list *next;
+	struct opal_sg_entry entry[SG_ENTRIES_PER_NODE];
+};
+
+struct validate_flash_t {
+	int		status;		/* Return status */
+	void		*buf;		/* Candiate image buffer */
+	uint32_t	buf_size;	/* Image size */
+	uint32_t	result;		/* Update results token */
+};
+
+struct manage_flash_t {
+	int status;		/* Return status */
+};
+
+struct update_flash_t {
+	int status;		/* Return status */
+};
+
+static struct image_header_t	image_header;
+static struct image_data_t	image_data;
+static struct validate_flash_t	validate_flash_data;
+static struct manage_flash_t	manage_flash_data;
+static struct update_flash_t	update_flash_data;
+
+static DEFINE_MUTEX(image_data_mutex);
+
+/*
+ * Validate candidate image
+ */
+static inline void opal_flash_validate(void)
+{
+	struct validate_flash_t *args_buf = &validate_flash_data;
+
+	args_buf->status = opal_validate_flash(__pa(args_buf->buf),
+					       &(args_buf->buf_size),
+					       &(args_buf->result));
+}
+
+/*
+ * Validate output format:
+ *     validate result token
+ *     current image version details
+ *     new image version details
+ */
+static ssize_t validate_show(struct kobject *kobj,
+			     struct kobj_attribute *attr, char *buf)
+{
+	struct validate_flash_t *args_buf = &validate_flash_data;
+	int len;
+
+	/* Candidate image is not validated */
+	if (args_buf->status < VALIDATE_TMP_UPDATE) {
+		len = sprintf(buf, "%d\n", args_buf->status);
+		goto out;
+	}
+
+	/* Result token */
+	len = sprintf(buf, "%d\n", args_buf->result);
+
+	/* Current and candidate image version details */
+	if ((args_buf->result != VALIDATE_TMP_UPDATE) &&
+	    (args_buf->result < VALIDATE_CUR_UNKNOWN))
+		goto out;
+
+	if (args_buf->buf_size > (VALIDATE_BUF_SIZE - len)) {
+		memcpy(buf + len, args_buf->buf, VALIDATE_BUF_SIZE - len);
+		len = VALIDATE_BUF_SIZE;
+	} else {
+		memcpy(buf + len, args_buf->buf, args_buf->buf_size);
+		len += args_buf->buf_size;
+	}
+out:
+	/* Set status to default */
+	args_buf->status = FLASH_NO_OP;
+	return len;
+}
+
+/*
+ * Validate candidate firmware image
+ *
+ * Note:
+ *   We are only interested in first 4K bytes of the
+ *   candidate image.
+ */
+static ssize_t validate_store(struct kobject *kobj,
+			      struct kobj_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct validate_flash_t *args_buf = &validate_flash_data;
+
+	if (buf[0] != '1')
+		return -EINVAL;
+
+	mutex_lock(&image_data_mutex);
+
+	if (image_data.status != IMAGE_READY ||
+	    image_data.size < VALIDATE_BUF_SIZE) {
+		args_buf->result = VALIDATE_INVALID_IMG;
+		args_buf->status = VALIDATE_IMG_INCOMPLETE;
+		goto out;
+	}
+
+	/* Copy first 4k bytes of candidate image */
+	memcpy(args_buf->buf, image_data.data, VALIDATE_BUF_SIZE);
+
+	args_buf->status = VALIDATE_IMG_READY;
+	args_buf->buf_size = VALIDATE_BUF_SIZE;
+
+	/* Validate candidate image */
+	opal_flash_validate();
+
+out:
+	mutex_unlock(&image_data_mutex);
+	return count;
+}
+
+/*
+ * Manage flash routine
+ */
+static inline void opal_flash_manage(uint8_t op)
+{
+	struct manage_flash_t *const args_buf = &manage_flash_data;
+
+	args_buf->status = opal_manage_flash(op);
+}
+
+/*
+ * Show manage flash status
+ */
+static ssize_t manage_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct manage_flash_t *const args_buf = &manage_flash_data;
+	int rc;
+
+	rc = sprintf(buf, "%d\n", args_buf->status);
+	/* Set status to default*/
+	args_buf->status = FLASH_NO_OP;
+	return rc;
+}
+
+/*
+ * Manage operations:
+ *   0 - Reject
+ *   1 - Commit
+ */
+static ssize_t manage_store(struct kobject *kobj,
+			    struct kobj_attribute *attr,
+			    const char *buf, size_t count)
+{
+	uint8_t op;
+	switch (buf[0]) {
+	case '0':
+		op = FLASH_REJECT_TMP_SIDE;
+		break;
+	case '1':
+		op = FLASH_COMMIT_TMP_SIDE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* commit/reject temporary image */
+	opal_flash_manage(op);
+	return count;
+}
+
+/*
+ * Free sg list
+ */
+static void free_sg_list(struct opal_sg_list *list)
+{
+	struct opal_sg_list *sg1;
+	while (list) {
+		sg1 = list->next;
+		kfree(list);
+		list = sg1;
+	}
+	list = NULL;
+}
+
+/*
+ * Build candidate image scatter gather list
+ *
+ * list format:
+ *   -----------------------------------
+ *  |  VER (8) | Entry length in bytes  |
+ *   -----------------------------------
+ *  |  Pointer to next entry            |
+ *   -----------------------------------
+ *  |  Address of memory area 1         |
+ *   -----------------------------------
+ *  |  Length of memory area 1          |
+ *   -----------------------------------
+ *  |   .........                       |
+ *   -----------------------------------
+ *  |   .........                       |
+ *   -----------------------------------
+ *  |  Address of memory area N         |
+ *   -----------------------------------
+ *  |  Length of memory area N          |
+ *   -----------------------------------
+ */
+static struct opal_sg_list *image_data_to_sglist(void)
+{
+	struct opal_sg_list *sg1, *list = NULL;
+	void *addr;
+	int size;
+
+	addr = image_data.data;
+	size = image_data.size;
+
+	sg1 = kzalloc((sizeof(struct opal_sg_list)), GFP_KERNEL);
+	if (!sg1)
+		return NULL;
+
+	list = sg1;
+	sg1->num_entries = 0;
+	while (size > 0) {
+		/* Translate virtual address to physical address */
+		sg1->entry[sg1->num_entries].data =
+			(void *)(vmalloc_to_pfn(addr) << PAGE_SHIFT);
+
+		if (size > PAGE_SIZE)
+			sg1->entry[sg1->num_entries].length = PAGE_SIZE;
+		else
+			sg1->entry[sg1->num_entries].length = size;
+
+		sg1->num_entries++;
+		if (sg1->num_entries >= SG_ENTRIES_PER_NODE) {
+			sg1->next = kzalloc((sizeof(struct opal_sg_list)),
+					    GFP_KERNEL);
+			if (!sg1->next) {
+				pr_err("%s : Failed to allocate memory\n",
+				       __func__);
+				goto nomem;
+			}
+
+			sg1 = sg1->next;
+			sg1->num_entries = 0;
+		}
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	return list;
+nomem:
+	free_sg_list(list);
+	return NULL;
+}
+
+/*
+ * OPAL update flash
+ */
+static int opal_flash_update(int op)
+{
+	struct opal_sg_list *sg, *list, *next;
+	unsigned long addr;
+	int64_t rc = OPAL_PARAMETER;
+
+	if (op == FLASH_UPDATE_CANCEL) {
+		pr_alert("FLASH: Image update cancelled\n");
+		addr = '\0';
+		goto flash;
+	}
+
+	list = image_data_to_sglist();
+	if (!list)
+		goto invalid_img;
+
+	/* First entry address */
+	addr = __pa(list);
+
+	/* Translate sg list address to absolute */
+	for (sg = list; sg; sg = next) {
+		next = sg->next;
+		/* Don't translate NULL pointer for last entry */
+		if (sg->next)
+			sg->next = (struct opal_sg_list *)__pa(sg->next);
+		else
+			sg->next = NULL;
+
+		/* Make num_entries into the version/length field */
+		sg->num_entries = (SG_LIST_VERSION << 56) |
+			(sg->num_entries * sizeof(struct opal_sg_entry) + 16);
+	}
+
+	pr_alert("FLASH: Image is %u bytes\n", image_data.size);
+	pr_alert("FLASH: Image update requested\n");
+	pr_alert("FLASH: Image will be updated during system reboot\n");
+	pr_alert("FLASH: This will take several minutes. Do not power off!\n");
+
+flash:
+	rc = opal_update_flash(addr);
+
+invalid_img:
+	return rc;
+}
+
+/*
+ * Show candidate image status
+ */
+static ssize_t update_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct update_flash_t *const args_buf = &update_flash_data;
+	return sprintf(buf, "%d\n", args_buf->status);
+}
+
+/*
+ * Set update image flag
+ *  1 - Flash new image
+ *  0 - Cancel flash request
+ */
+static ssize_t update_store(struct kobject *kobj,
+			    struct kobj_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct update_flash_t *const args_buf = &update_flash_data;
+	int rc = count;
+
+	mutex_lock(&image_data_mutex);
+
+	switch (buf[0]) {
+	case '0':
+		if (args_buf->status == FLASH_IMG_READY)
+			opal_flash_update(FLASH_UPDATE_CANCEL);
+		args_buf->status = FLASH_NO_OP;
+		break;
+	case '1':
+		/* Image is loaded? */
+		if (image_data.status == IMAGE_READY)
+			args_buf->status =
+				opal_flash_update(FLASH_UPDATE_INIT);
+		else
+			args_buf->status = FLASH_INVALID_IMG;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+
+	mutex_unlock(&image_data_mutex);
+	return rc;
+}
+
+/*
+ * Free image buffer
+ */
+static void free_image_buf(void)
+{
+	void *addr;
+	int size;
+
+	addr = image_data.data;
+	size = PAGE_ALIGN(image_data.size);
+	while (size > 0) {
+		ClearPageReserved(vmalloc_to_page(addr));
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	vfree(image_data.data);
+	image_data.data = NULL;
+	image_data.status = IMAGE_INVALID;
+}
+
+/*
+ * Allocate image buffer.
+ */
+static int alloc_image_buf(char *buffer, size_t count)
+{
+	void *addr;
+	int size;
+
+	if (count < sizeof(struct image_header_t)) {
+		pr_warn("FLASH: Invalid candidate image\n");
+		return -EINVAL;
+	}
+
+	memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t));
+	image_data.size = be32_to_cpu(image_header.size);
+	pr_debug("FLASH: Candiate image size = %u\n", image_data.size);
+
+	if (image_data.size > MAX_IMAGE_SIZE) {
+		pr_warn("FLASH: Too large image\n");
+		return -EINVAL;
+	}
+	if (image_data.size < VALIDATE_BUF_SIZE) {
+		pr_warn("FLASH: Image is shorter than expected\n");
+		return -EINVAL;
+	}
+
+	image_data.data = vzalloc(PAGE_ALIGN(image_data.size));
+	if (!image_data.data) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* Pin memory */
+	addr = image_data.data;
+	size = PAGE_ALIGN(image_data.size);
+	while (size > 0) {
+		SetPageReserved(vmalloc_to_page(addr));
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	image_data.status = IMAGE_LOADING;
+	return 0;
+}
+
+/*
+ * Copy candidate image
+ *
+ * Parse candidate image header to get total image size
+ * and pre-allocate required memory.
+ */
+static ssize_t image_data_write(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr,
+				char *buffer, loff_t pos, size_t count)
+{
+	int rc;
+
+	mutex_lock(&image_data_mutex);
+
+	/* New image ? */
+	if (pos == 0) {
+		/* Free memory, if already allocated */
+		if (image_data.data)
+			free_image_buf();
+
+		/* Cancel outstanding image update request */
+		if (update_flash_data.status == FLASH_IMG_READY)
+			opal_flash_update(FLASH_UPDATE_CANCEL);
+
+		/* Allocate memory */
+		rc = alloc_image_buf(buffer, count);
+		if (rc)
+			goto out;
+	}
+
+	if (image_data.status != IMAGE_LOADING) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	if ((pos + count) > image_data.size) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	memcpy(image_data.data + pos, (void *)buffer, count);
+	rc = count;
+
+	/* Set image status */
+	if ((pos + count) == image_data.size) {
+		pr_debug("FLASH: Candidate image loaded....\n");
+		image_data.status = IMAGE_READY;
+	}
+
+out:
+	mutex_unlock(&image_data_mutex);
+	return rc;
+}
+
+/*
+ * sysfs interface :
+ *  OPAL uses below sysfs files for code update.
+ *  We create these files under /sys/firmware/opal.
+ *
+ *   image		: Interface to load candidate firmware image
+ *   validate_flash	: Validate firmware image
+ *   manage_flash	: Commit/Reject firmware image
+ *   update_flash	: Flash new firmware image
+ *
+ */
+static struct bin_attribute image_data_attr = {
+	.attr = {.name = "image", .mode = 0200},
+	.size = MAX_IMAGE_SIZE,	/* Limit image size */
+	.write = image_data_write,
+};
+
+static struct kobj_attribute validate_attribute =
+	__ATTR(validate_flash, 0600, validate_show, validate_store);
+
+static struct kobj_attribute manage_attribute =
+	__ATTR(manage_flash, 0600, manage_show, manage_store);
+
+static struct kobj_attribute update_attribute =
+	__ATTR(update_flash, 0600, update_show, update_store);
+
+static struct attribute *image_op_attrs[] = {
+	&validate_attribute.attr,
+	&manage_attribute.attr,
+	&update_attribute.attr,
+	NULL	/* need to NULL terminate the list of attributes */
+};
+
+static struct attribute_group image_op_attr_group = {
+	.attrs = image_op_attrs,
+};
+
+void __init opal_flash_init(void)
+{
+	int ret;
+
+	/* Allocate validate image buffer */
+	validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL);
+	if (!validate_flash_data.buf) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		return;
+	}
+
+	/* Make sure /sys/firmware/opal directory is created */
+	if (!opal_kobj) {
+		pr_warn("FLASH: opal kobject is not available\n");
+		goto nokobj;
+	}
+
+	/* Create the sysfs files */
+	ret = sysfs_create_group(opal_kobj, &image_op_attr_group);
+	if (ret) {
+		pr_warn("FLASH: Failed to create sysfs files\n");
+		goto nokobj;
+	}
+
+	ret = sysfs_create_bin_file(opal_kobj, &image_data_attr);
+	if (ret) {
+		pr_warn("FLASH: Failed to create sysfs files\n");
+		goto nosysfs_file;
+	}
+
+	/* Set default status */
+	validate_flash_data.status = FLASH_NO_OP;
+	manage_flash_data.status = FLASH_NO_OP;
+	update_flash_data.status = FLASH_NO_OP;
+	image_data.status = IMAGE_INVALID;
+	return;
+
+nosysfs_file:
+	sysfs_remove_group(opal_kobj, &image_op_attr_group);
+
+nokobj:
+	kfree(validate_flash_data.buf);
+	return;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 403d05840625..e7806504e976 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -123,3 +123,6 @@ OPAL_CALL(opal_xscom_write,			OPAL_XSCOM_WRITE);
 OPAL_CALL(opal_lpc_read,			OPAL_LPC_READ);
 OPAL_CALL(opal_lpc_write,			OPAL_LPC_WRITE);
 OPAL_CALL(opal_return_cpu,			OPAL_RETURN_CPU);
+OPAL_CALL(opal_validate_flash,			OPAL_FLASH_VALIDATE);
+OPAL_CALL(opal_manage_flash,			OPAL_FLASH_MANAGE);
+OPAL_CALL(opal_update_flash,			OPAL_FLASH_UPDATE);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 37f06580709a..b56c243aaee9 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -438,6 +438,10 @@ static int __init opal_init(void)
 
 	/* Create "opal" kobject under /sys/firmware */
 	rc = opal_sysfs_init();
+	if (rc == 0) {
+		/* Setup code update interface */
+		opal_flash_init();
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 9c662cad2fb66ff3a44b1d4f545bf496bf67ab10 Mon Sep 17 00:00:00 2001
From: Philippe Bergheaud <felix@linux.vnet.ibm.com>
Date: Tue, 24 Sep 2013 14:13:35 +0200
Subject: powerpc/bpf: BPF JIT compiler for 64-bit Little Endian

This enables the Berkeley Packet Filter JIT compiler
for the PowerPC running in 64bit Little Endian.

Signed-off-by: Philippe Bergheaud <felix@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/ppc-opcode.h |  1 +
 arch/powerpc/net/bpf_jit.h            | 10 ++++++++++
 arch/powerpc/net/bpf_jit_64.S         |  9 ++++++++-
 arch/powerpc/net/bpf_jit_comp.c       | 17 ++---------------
 4 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 442edee4b6aa..99f87906de17 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -203,6 +203,7 @@
 /* Misc instructions for BPF compiler */
 #define PPC_INST_LD			0xe8000000
 #define PPC_INST_LHZ			0xa0000000
+#define PPC_INST_LHBRX			0x7c00062c
 #define PPC_INST_LWZ			0x80000000
 #define PPC_INST_STD			0xf8000000
 #define PPC_INST_STDU			0xf8000001
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 8a5dfaf5c6b7..0baf2b826e25 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -92,6 +92,8 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
 				     ___PPC_RA(base) | IMM_L(i))
 #define PPC_LHZ(r, base, i)	EMIT(PPC_INST_LHZ | ___PPC_RT(r) |	      \
 				     ___PPC_RA(base) | IMM_L(i))
+#define PPC_LHBRX(r, base, b)	EMIT(PPC_INST_LHBRX | ___PPC_RT(r) |	      \
+				     ___PPC_RA(base) | ___PPC_RB(b))
 /* Convenience helpers for the above with 'far' offsets: */
 #define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i);     \
 		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
@@ -186,6 +188,14 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
 				PPC_ORI(d, d, (uintptr_t)(i) & 0xffff);	      \
 		} } while (0);
 
+#define PPC_LHBRX_OFFS(r, base, i) \
+		do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0)
+#ifdef __LITTLE_ENDIAN__
+#define PPC_NTOHS_OFFS(r, base, i)	PPC_LHBRX_OFFS(r, base, i)
+#else
+#define PPC_NTOHS_OFFS(r, base, i)	PPC_LHZ_OFFS(r, base, i)
+#endif
+
 static inline bool is_nearbranch(int offset)
 {
 	return (offset < 32768) && (offset >= -32768);
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index 7d3a3b5619a2..e76eba74d9da 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -43,8 +43,11 @@ sk_load_word_positive_offset:
 	cmpd	r_scratch1, r_addr
 	blt	bpf_slow_path_word
 	/* Nope, just hitting the header.  cr0 here is eq or gt! */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	r_A, r_D, r_addr
+#else
 	lwzx	r_A, r_D, r_addr
-	/* When big endian we don't need to byteswap. */
+#endif
 	blr	/* Return success, cr0 != LT */
 
 	.globl	sk_load_half
@@ -56,7 +59,11 @@ sk_load_half_positive_offset:
 	subi	r_scratch1, r_HL, 2
 	cmpd	r_scratch1, r_addr
 	blt	bpf_slow_path_half
+#ifdef __LITTLE_ENDIAN__
+	lhbrx	r_A, r_D, r_addr
+#else
 	lhzx	r_A, r_D, r_addr
+#endif
 	blr
 
 	.globl	sk_load_byte
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index bf56e33f8257..81cd6c79babe 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -17,14 +17,8 @@
 
 #include "bpf_jit.h"
 
-#ifndef __BIG_ENDIAN
-/* There are endianness assumptions herein. */
-#error "Little-endian PPC not supported in BPF compiler"
-#endif
-
 int bpf_jit_enable __read_mostly;
 
-
 static inline void bpf_flush_icache(void *start, void *end)
 {
 	smp_wmb();
@@ -346,18 +340,11 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			break;
 
 			/*** Ancillary info loads ***/
-
-			/* None of the BPF_S_ANC* codes appear to be passed by
-			 * sk_chk_filter().  The interpreter and the x86 BPF
-			 * compiler implement them so we do too -- they may be
-			 * planted in future.
-			 */
 		case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  protocol) != 2);
-			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-							  protocol));
-			/* ntohs is a NOP with BE loads. */
+			PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+							    protocol));
 			break;
 		case BPF_S_ANC_IFINDEX:
 			PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
-- 
cgit v1.2.3


From d0cebfa650a084f041131207d81f9b311babd5ef Mon Sep 17 00:00:00 2001
From: Philippe Bergheaud <felix@linux.vnet.ibm.com>
Date: Thu, 26 Sep 2013 08:30:09 +0200
Subject: powerpc: word-at-a-time optimization for 64-bit Little Endian

This is an optimization for the PowerPC in 64-bit
little-endian. Bit counting is used in find_zero(), instead
of the multiply and shift.

It is modelled after Alan Modra's PowerPC LE strlen patch
http://sourceware.org/ml/libc-alpha/2013-08/msg00097.html.

Signed-off-by: Philippe Bergheaud <felix@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/word-at-a-time.h | 57 +++++++++++++++++--------------
 1 file changed, 32 insertions(+), 25 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index 213a5f2b0717..9a5c928bb3c6 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -42,13 +42,6 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct
 
 #else
 
-/*
- * This is largely generic for little-endian machines, but the
- * optimal byte mask counting is probably going to be something
- * that is architecture-specific. If you have a reliably fast
- * bit count instruction, that might be better than the multiply
- * and shift, for example.
- */
 struct word_at_a_time {
 	const unsigned long one_bits, high_bits;
 };
@@ -57,19 +50,32 @@ struct word_at_a_time {
 
 #ifdef CONFIG_64BIT
 
-/*
- * Jan Achrenius on G+: microoptimized version of
- * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
- * that works for the bytemasks without having to
- * mask them first.
- */
-static inline long count_masked_bytes(unsigned long mask)
+/* Alan Modra's little-endian strlen tail for 64-bit */
+#define create_zero_mask(mask) (mask)
+
+static inline unsigned long find_zero(unsigned long mask)
 {
-	return mask*0x0001020304050608ul >> 56;
+	unsigned long leading_zero_bits;
+	long trailing_zero_bit_mask;
+
+	asm ("addi %1,%2,-1\n\t"
+	     "andc %1,%1,%2\n\t"
+	     "popcntd %0,%1"
+	     : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
+	     : "r" (mask));
+	return leading_zero_bits >> 3;
 }
 
 #else	/* 32-bit case */
 
+/*
+ * This is largely generic for little-endian machines, but the
+ * optimal byte mask counting is probably going to be something
+ * that is architecture-specific. If you have a reliably fast
+ * bit count instruction, that might be better than the multiply
+ * and shift, for example.
+ */
+
 /* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
 static inline long count_masked_bytes(long mask)
 {
@@ -79,6 +85,17 @@ static inline long count_masked_bytes(long mask)
 	return a & mask;
 }
 
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	return count_masked_bytes(mask);
+}
+
 #endif
 
 /* Return nonzero if it has a zero */
@@ -94,19 +111,9 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
 	return bits;
 }
 
-static inline unsigned long create_zero_mask(unsigned long bits)
-{
-	bits = (bits - 1) & ~bits;
-	return bits >> 7;
-}
-
 /* The mask we created is directly usable as a bytemask */
 #define zero_bytemask(mask) (mask)
 
-static inline unsigned long find_zero(unsigned long mask)
-{
-	return count_masked_bytes(mask);
-}
 #endif
 
 #endif /* _ASM_WORD_AT_A_TIME_H */
-- 
cgit v1.2.3


From a3e31b4588443f37d82195096c6b30dff1c152c2 Mon Sep 17 00:00:00 2001
From: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Date: Wed, 18 Sep 2013 11:53:05 +0100
Subject: of: Move definition of of_find_next_cache_node into common code.

Since the definition of_find_next_cache_node is architecture independent,
the existing definition in powerpc can be moved to driver/of/base.c

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/prom.h |  3 ---
 arch/powerpc/kernel/prom.c      | 31 -------------------------------
 drivers/of/base.c               | 31 +++++++++++++++++++++++++++++++
 include/linux/of.h              |  2 ++
 4 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 7d0c7f3a7171..bf09e5a065b8 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -44,9 +44,6 @@ void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window,
 
 extern void kdump_move_device_tree(void);
 
-/* cache lookup */
-struct device_node *of_find_next_cache_node(struct device_node *np);
-
 #ifdef CONFIG_NUMA
 extern int of_node_to_nid(struct device_node *device);
 #else
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 09be2759c314..4432fd86a6d2 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -760,37 +760,6 @@ void __init early_init_devtree(void *params)
  *
  *******/
 
-/**
- *	of_find_next_cache_node - Find a node's subsidiary cache
- *	@np:	node of type "cpu" or "cache"
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.  Caller should hold a reference
- *	to np.
- */
-struct device_node *of_find_next_cache_node(struct device_node *np)
-{
-	struct device_node *child;
-	const phandle *handle;
-
-	handle = of_get_property(np, "l2-cache", NULL);
-	if (!handle)
-		handle = of_get_property(np, "next-level-cache", NULL);
-
-	if (handle)
-		return of_find_node_by_phandle(be32_to_cpup(handle));
-
-	/* OF on pmac has nodes instead of properties named "l2-cache"
-	 * beneath CPU nodes.
-	 */
-	if (!strcmp(np->type, "cpu"))
-		for_each_child_of_node(np, child)
-			if (!strcmp(child->type, "cache"))
-				return child;
-
-	return NULL;
-}
-
 /**
  * of_get_ibm_chip_id - Returns the IBM "chip-id" of a device
  * @np: device node of the device
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 865d3f66c86b..b2cee3db5ceb 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1884,3 +1884,34 @@ int of_device_is_stdout_path(struct device_node *dn)
 	return of_stdout == dn;
 }
 EXPORT_SYMBOL_GPL(of_device_is_stdout_path);
+
+/**
+ *	of_find_next_cache_node - Find a node's subsidiary cache
+ *	@np:	node of type "cpu" or "cache"
+ *
+ *	Returns a node pointer with refcount incremented, use
+ *	of_node_put() on it when done.  Caller should hold a reference
+ *	to np.
+ */
+struct device_node *of_find_next_cache_node(const struct device_node *np)
+{
+	struct device_node *child;
+	const phandle *handle;
+
+	handle = of_get_property(np, "l2-cache", NULL);
+	if (!handle)
+		handle = of_get_property(np, "next-level-cache", NULL);
+
+	if (handle)
+		return of_find_node_by_phandle(be32_to_cpup(handle));
+
+	/* OF on pmac has nodes instead of properties named "l2-cache"
+	 * beneath CPU nodes.
+	 */
+	if (!strcmp(np->type, "cpu"))
+		for_each_child_of_node(np, child)
+			if (!strcmp(child->type, "cache"))
+				return child;
+
+	return NULL;
+}
diff --git a/include/linux/of.h b/include/linux/of.h
index f95aee391e30..c08c07e249b3 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -226,6 +226,8 @@ static inline int of_get_child_count(const struct device_node *np)
 	return num;
 }
 
+/* cache lookup */
+extern struct device_node *of_find_next_cache_node(const struct device_node *);
 extern struct device_node *of_find_node_with_property(
 	struct device_node *from, const char *prop_name);
 #define for_each_node_with_property(dn, prop_name) \
-- 
cgit v1.2.3


From a40a2b670706494610d794927b9aebe77e18af8d Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <murzin.v@gmail.com>
Date: Sat, 28 Sep 2013 10:22:00 +0200
Subject: powerpc/bpf: Fix DIVWU instruction opcode

Currently DIVWU stands for *signed* divw opcode:

7d 2a 4b 96 	divwu   r9,r10,r9
7d 2a 4b d6 	divw    r9,r10,r9

Use the *unsigned* divw opcode for DIVWU.

Suggested-by: Vassili Karpov <av1474@comtv.ru>
Reviewed-by: Vassili Karpov <av1474@comtv.ru>
Signed-off-by: Vladimir Murzin <murzin.v@gmail.com>
Acked-by: Matt Evans <matt@ozlabs.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/ppc-opcode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 99f87906de17..3132bb9365f3 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -222,7 +222,7 @@
 #define PPC_INST_MULLW			0x7c0001d6
 #define PPC_INST_MULHWU			0x7c000016
 #define PPC_INST_MULLI			0x1c000000
-#define PPC_INST_DIVWU			0x7c0003d6
+#define PPC_INST_DIVWU			0x7c000396
 #define PPC_INST_RLWINM			0x54000000
 #define PPC_INST_RLDICR			0x78000004
 #define PPC_INST_SLW			0x7c000030
-- 
cgit v1.2.3


From d7a88c7eb46acb486922822eec3224c0bcab29dc Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 10 Oct 2013 19:18:02 +1100
Subject: powerpc/scom: Enable 64-bit addresses

On P8, XSCOM addresses has a special "indirect" form that
requires more than 32-bits, so let's use u64 everywhere in
the code instead of u32.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/scom.h             | 8 ++++----
 arch/powerpc/platforms/powernv/opal-xscom.c | 6 +++---
 arch/powerpc/platforms/wsp/scom_wsp.c       | 4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h
index 07dcdcfdaefc..f5cde45b1161 100644
--- a/arch/powerpc/include/asm/scom.h
+++ b/arch/powerpc/include/asm/scom.h
@@ -54,8 +54,8 @@ struct scom_controller {
 	scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
 	void (*unmap)(scom_map_t map);
 
-	int (*read)(scom_map_t map, u32 reg, u64 *value);
-	int (*write)(scom_map_t map, u32 reg, u64 value);
+	int (*read)(scom_map_t map, u64 reg, u64 *value);
+	int (*write)(scom_map_t map, u64 reg, u64 value);
 };
 
 extern const struct scom_controller *scom_controller;
@@ -137,7 +137,7 @@ static inline void scom_unmap(scom_map_t map)
  *
  * Returns 0 (success) or a negative error code
  */
-static inline int scom_read(scom_map_t map, u32 reg, u64 *value)
+static inline int scom_read(scom_map_t map, u64 reg, u64 *value)
 {
 	int rc;
 
@@ -155,7 +155,7 @@ static inline int scom_read(scom_map_t map, u32 reg, u64 *value)
  *
  * Returns 0 (success) or a negative error code
  */
-static inline int scom_write(scom_map_t map, u32 reg, u64 value)
+static inline int scom_write(scom_map_t map, u64 reg, u64 value)
 {
 	return scom_controller->write(map, reg, value);
 }
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 3ed5c6498324..09a90d8897ba 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -27,7 +27,7 @@
  */
 struct opal_scom_map {
 	uint32_t chip;
-	uint32_t addr;
+	uint64_t addr;
 };
 
 static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
@@ -71,7 +71,7 @@ static int opal_xscom_err_xlate(int64_t rc)
 	}
 }
 
-static int opal_scom_read(scom_map_t map, u32 reg, u64 *value)
+static int opal_scom_read(scom_map_t map, u64 reg, u64 *value)
 {
 	struct opal_scom_map *m = map;
 	int64_t rc;
@@ -80,7 +80,7 @@ static int opal_scom_read(scom_map_t map, u32 reg, u64 *value)
 	return opal_xscom_err_xlate(rc);
 }
 
-static int opal_scom_write(scom_map_t map, u32 reg, u64 value)
+static int opal_scom_write(scom_map_t map, u64 reg, u64 value)
 {
 	struct opal_scom_map *m = map;
 	int64_t rc;
diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c
index 54172c4a8a64..8928507affea 100644
--- a/arch/powerpc/platforms/wsp/scom_wsp.c
+++ b/arch/powerpc/platforms/wsp/scom_wsp.c
@@ -50,7 +50,7 @@ static void wsp_scom_unmap(scom_map_t map)
 	iounmap((void *)map);
 }
 
-static int wsp_scom_read(scom_map_t map, u32 reg, u64 *value)
+static int wsp_scom_read(scom_map_t map, u64 reg, u64 *value)
 {
 	u64 __iomem *addr = (u64 __iomem *)map;
 
@@ -59,7 +59,7 @@ static int wsp_scom_read(scom_map_t map, u32 reg, u64 *value)
 	return 0;
 }
 
-static int wsp_scom_write(scom_map_t map, u32 reg, u64 value)
+static int wsp_scom_write(scom_map_t map, u64 reg, u64 value)
 {
 	u64 __iomem *addr = (u64 __iomem *)map;
 
-- 
cgit v1.2.3


From 0c4888ef1d8a8b82c29075ce7e257ff795af15c7 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 5 Nov 2013 16:33:22 +1100
Subject: powerpc: Fix fatal SLB miss when restoring PPR

When restoring the PPR value, we incorrectly access the thread structure
at a time where MSR:RI is clear, which means we cannot recover from nested
faults. However the thread structure isn't covered by the "bolted" SLB
entries and thus accessing can fault.

This fixes it by splitting the code so that the PPR value is loaded into
a GPR before MSR:RI is cleared.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/ppc_asm.h |  7 -------
 arch/powerpc/kernel/entry_64.S     | 10 +++++++++-
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 8deaaad3b32f..3c1acc31a092 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -406,13 +406,6 @@ BEGIN_FTR_SECTION_NESTED(945)						\
 	std	ra,TASKTHREADPPR(rb);					\
 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
 
-#define RESTORE_PPR(ra, rb)						\
-BEGIN_FTR_SECTION_NESTED(946)						\
-	ld	ra,PACACURRENT(r13);					\
-	ld	rb,TASKTHREADPPR(ra);					\
-	mtspr	SPRN_PPR,rb;	/* Restore PPR */			\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946)
-
 #endif
 
 /*
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 12679cd43e0c..bbfb0294b354 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -818,6 +818,12 @@ fast_exception_return:
 	andi.	r0,r3,MSR_RI
 	beq-	unrecov_restore
 
+	/* Load PPR from thread struct before we clear MSR:RI */
+BEGIN_FTR_SECTION
+	ld	r2,PACACURRENT(r13)
+	ld	r2,TASKTHREADPPR(r2)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
 	/*
 	 * Clear RI before restoring r13.  If we are returning to
 	 * userspace and we take an exception after restoring r13,
@@ -838,8 +844,10 @@ fast_exception_return:
 	 */
 	andi.	r0,r3,MSR_PR
 	beq	1f
+BEGIN_FTR_SECTION
+	mtspr	SPRN_PPR,r2	/* Restore PPR */
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ACCOUNT_CPU_USER_EXIT(r2, r4)
-	RESTORE_PPR(r2, r4)
 	REST_GPR(13, r1)
 1:
 	mtspr	SPRN_SRR1,r3
-- 
cgit v1.2.3


From 3820b4d2789f5166afdb136bb14f93166e6cfbc2 Mon Sep 17 00:00:00 2001
From: "David A. Long" <dave.long@linaro.org>
Date: Tue, 15 Oct 2013 17:04:16 -0400
Subject: uprobes: Move function declarations out of arch

Move the function declarations from the arch headers to the common
header, since only the function bodies are architecture-specific.
These changes are from Vincent Rabin's uprobes patch.

[ oleg: update arch/powerpc/include/asm/uprobes.h ]

Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: David A. Long <dave.long@linaro.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/powerpc/include/asm/uprobes.h | 7 -------
 arch/x86/include/asm/uprobes.h     | 7 -------
 include/linux/uprobes.h            | 8 ++++++++
 3 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index 23016020915e..b6fc3178372a 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -45,11 +45,4 @@ struct arch_uprobe_task {
 	unsigned long	saved_trap_nr;
 };
 
-extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
-extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
-extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
-extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 #endif	/* _ASM_UPROBES_H */
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 6e5197910fd8..b20b4d68b934 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -49,11 +49,4 @@ struct arch_uprobe_task {
 	unsigned int			saved_tf;
 };
 
-extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
-extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
-extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
-extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 #endif	/* _ASM_UPROBES_H */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 9e0d5a6fe7a8..28473e3f6068 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -30,6 +30,7 @@
 struct vm_area_struct;
 struct mm_struct;
 struct inode;
+struct notifier_block;
 
 #ifdef CONFIG_ARCH_SUPPORTS_UPROBES
 # include <asm/uprobes.h>
@@ -125,6 +126,13 @@ extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
 extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void uprobe_clear_state(struct mm_struct *mm);
+extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
+extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
+extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
+extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
-- 
cgit v1.2.3


From 8a8de66c4f6ebd0f6d3da026ec24339aa5d1db12 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 4 Nov 2013 20:27:13 +0100
Subject: uprobes: Introduce arch_uprobe->ixol

Currently xol_get_insn_slot() assumes that we should simply copy
arch_uprobe->insn[] which is (ignoring arch_uprobe_analyze_insn)
just the copy of the original insn.

This is not true for arm which needs to create another insn to
execute it out-of-line.

So this patch simply adds the new member, ->ixol into the union.
This doesn't make any difference for x86 and powerpc, but arm
can divorce insn/ixol and initialize the correct xol insn in
arch_uprobe_analyze_insn().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/powerpc/include/asm/uprobes.h | 1 +
 arch/x86/include/asm/uprobes.h     | 5 ++++-
 kernel/events/uprobes.c            | 3 ++-
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index b6fc3178372a..75c6ecdb8f37 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -37,6 +37,7 @@ typedef ppc_opcode_t uprobe_opcode_t;
 struct arch_uprobe {
 	union {
 		u8	insn[MAX_UINSN_BYTES];
+		u8	ixol[MAX_UINSN_BYTES];
 		u32	ainsn;
 	};
 };
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index b20b4d68b934..3087ea9c5f2e 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -35,7 +35,10 @@ typedef u8 uprobe_opcode_t;
 
 struct arch_uprobe {
 	u16				fixups;
-	u8				insn[MAX_UINSN_BYTES];
+	union {
+		u8			insn[MAX_UINSN_BYTES];
+		u8			ixol[MAX_UINSN_BYTES];
+	};
 #ifdef CONFIG_X86_64
 	unsigned long			rip_rela_target_address;
 #endif
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0012c8ebb098..fbcff61b5099 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1264,7 +1264,8 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
 		return 0;
 
 	/* Initialize the slot */
-	copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES);
+	copy_to_page(area->page, xol_vaddr,
+			uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
 	/*
 	 * We probably need flush_icache_user_range() but it needs vma.
 	 * This should work on supported architectures too.
-- 
cgit v1.2.3


From ebff90b288c347f3af1b3d164c258aeb2bed60ec Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <prabhakar@freescale.com>
Date: Tue, 24 Sep 2013 16:41:23 +0530
Subject: driver/mtd/IFC: Add support of 8K page size NAND flash

Current IFC driver supports till 4K page size NAND flash.
Add support of 8K Page size NAND flash
  - Add nand_ecclayout for 4 bit & 8 bit ecc
  - Defines constants
  - also fix ecc.strength for 8bit ecc of 8K page size NAND

Signed-off-by: Prabhakar Kushwaha <prabhakar@freescale.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 arch/powerpc/include/asm/fsl_ifc.h |  2 +
 drivers/mtd/nand/fsl_ifc_nand.c    | 77 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/fsl_ifc.h b/arch/powerpc/include/asm/fsl_ifc.h
index b8a4b9bc50b3..f49ddb1b2273 100644
--- a/arch/powerpc/include/asm/fsl_ifc.h
+++ b/arch/powerpc/include/asm/fsl_ifc.h
@@ -93,6 +93,7 @@
 #define CSOR_NAND_PGS_512		0x00000000
 #define CSOR_NAND_PGS_2K		0x00080000
 #define CSOR_NAND_PGS_4K		0x00100000
+#define CSOR_NAND_PGS_8K		0x00180000
 /* Spare region Size */
 #define CSOR_NAND_SPRZ_MASK		0x0000E000
 #define CSOR_NAND_SPRZ_SHIFT		13
@@ -102,6 +103,7 @@
 #define CSOR_NAND_SPRZ_210		0x00006000
 #define CSOR_NAND_SPRZ_218		0x00008000
 #define CSOR_NAND_SPRZ_224		0x0000A000
+#define CSOR_NAND_SPRZ_CSOR_EXT		0x0000C000
 /* Pages Per Block */
 #define CSOR_NAND_PB_MASK		0x00000700
 #define CSOR_NAND_PB_SHIFT		8
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index 1c173d91cbea..9d1cf005878f 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -135,6 +135,69 @@ static struct nand_ecclayout oob_4096_ecc8 = {
 	.oobfree = { {2, 6}, {136, 82} },
 };
 
+/* 8192-byte page size with 4-bit ECC */
+static struct nand_ecclayout oob_8192_ecc4 = {
+	.eccbytes = 128,
+	.eccpos = {
+		8, 9, 10, 11, 12, 13, 14, 15,
+		16, 17, 18, 19, 20, 21, 22, 23,
+		24, 25, 26, 27, 28, 29, 30, 31,
+		32, 33, 34, 35, 36, 37, 38, 39,
+		40, 41, 42, 43, 44, 45, 46, 47,
+		48, 49, 50, 51, 52, 53, 54, 55,
+		56, 57, 58, 59, 60, 61, 62, 63,
+		64, 65, 66, 67, 68, 69, 70, 71,
+		72, 73, 74, 75, 76, 77, 78, 79,
+		80, 81, 82, 83, 84, 85, 86, 87,
+		88, 89, 90, 91, 92, 93, 94, 95,
+		96, 97, 98, 99, 100, 101, 102, 103,
+		104, 105, 106, 107, 108, 109, 110, 111,
+		112, 113, 114, 115, 116, 117, 118, 119,
+		120, 121, 122, 123, 124, 125, 126, 127,
+		128, 129, 130, 131, 132, 133, 134, 135,
+	},
+	.oobfree = { {2, 6}, {136, 208} },
+};
+
+/* 8192-byte page size with 8-bit ECC -- requires 218-byte OOB */
+static struct nand_ecclayout oob_8192_ecc8 = {
+	.eccbytes = 256,
+	.eccpos = {
+		8, 9, 10, 11, 12, 13, 14, 15,
+		16, 17, 18, 19, 20, 21, 22, 23,
+		24, 25, 26, 27, 28, 29, 30, 31,
+		32, 33, 34, 35, 36, 37, 38, 39,
+		40, 41, 42, 43, 44, 45, 46, 47,
+		48, 49, 50, 51, 52, 53, 54, 55,
+		56, 57, 58, 59, 60, 61, 62, 63,
+		64, 65, 66, 67, 68, 69, 70, 71,
+		72, 73, 74, 75, 76, 77, 78, 79,
+		80, 81, 82, 83, 84, 85, 86, 87,
+		88, 89, 90, 91, 92, 93, 94, 95,
+		96, 97, 98, 99, 100, 101, 102, 103,
+		104, 105, 106, 107, 108, 109, 110, 111,
+		112, 113, 114, 115, 116, 117, 118, 119,
+		120, 121, 122, 123, 124, 125, 126, 127,
+		128, 129, 130, 131, 132, 133, 134, 135,
+		136, 137, 138, 139, 140, 141, 142, 143,
+		144, 145, 146, 147, 148, 149, 150, 151,
+		152, 153, 154, 155, 156, 157, 158, 159,
+		160, 161, 162, 163, 164, 165, 166, 167,
+		168, 169, 170, 171, 172, 173, 174, 175,
+		176, 177, 178, 179, 180, 181, 182, 183,
+		184, 185, 186, 187, 188, 189, 190, 191,
+		192, 193, 194, 195, 196, 197, 198, 199,
+		200, 201, 202, 203, 204, 205, 206, 207,
+		208, 209, 210, 211, 212, 213, 214, 215,
+		216, 217, 218, 219, 220, 221, 222, 223,
+		224, 225, 226, 227, 228, 229, 230, 231,
+		232, 233, 234, 235, 236, 237, 238, 239,
+		240, 241, 242, 243, 244, 245, 246, 247,
+		248, 249, 250, 251, 252, 253, 254, 255,
+		256, 257, 258, 259, 260, 261, 262, 263,
+	},
+	.oobfree = { {2, 6}, {264, 80} },
+};
 
 /*
  * Generic flash bbt descriptors
@@ -870,11 +933,25 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 		} else {
 			layout = &oob_4096_ecc8;
 			chip->ecc.bytes = 16;
+			chip->ecc.strength = 8;
 		}
 
 		priv->bufnum_mask = 1;
 		break;
 
+	case CSOR_NAND_PGS_8K:
+		if ((csor & CSOR_NAND_ECC_MODE_MASK) ==
+		    CSOR_NAND_ECC_MODE_4) {
+			layout = &oob_8192_ecc4;
+		} else {
+			layout = &oob_8192_ecc8;
+			chip->ecc.bytes = 16;
+			chip->ecc.strength = 8;
+		}
+
+		priv->bufnum_mask = 0;
+	break;
+
 	default:
 		dev_err(priv->dev, "bad csor %#x: bad page size\n", csor);
 		return -ENODEV;
-- 
cgit v1.2.3


From cdc3d5627d5f7c4e6b6372b9fb39cba0fe6a9b2a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 22:24:29 -0400
Subject: switch elf_coredump_extra_notes_write() to dump_emit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/include/asm/spu.h               |  3 +-
 arch/powerpc/platforms/cell/spu_syscalls.c   |  5 ++--
 arch/powerpc/platforms/cell/spufs/coredump.c | 44 +++++++++++-----------------
 arch/powerpc/platforms/cell/spufs/spufs.h    |  3 +-
 fs/binfmt_elf.c                              |  7 ++---
 include/linux/elf.h                          |  6 ++--
 6 files changed, 30 insertions(+), 38 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h
index 93f280e23279..37b7ca39ec9f 100644
--- a/arch/powerpc/include/asm/spu.h
+++ b/arch/powerpc/include/asm/spu.h
@@ -235,6 +235,7 @@ extern long spu_sys_callback(struct spu_syscall_block *s);
 
 /* syscalls implemented in spufs */
 struct file;
+struct coredump_params;
 struct spufs_calls {
 	long (*create_thread)(const char __user *name,
 					unsigned int flags, umode_t mode,
@@ -242,7 +243,7 @@ struct spufs_calls {
 	long (*spu_run)(struct file *filp, __u32 __user *unpc,
 						__u32 __user *ustatus);
 	int (*coredump_extra_notes_size)(void);
-	int (*coredump_extra_notes_write)(struct file *file, loff_t *foffset);
+	int (*coredump_extra_notes_write)(struct coredump_params *cprm);
 	void (*notify_spus_active)(void);
 	struct module *owner;
 };
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index db4e638cf408..3844f1397fc3 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
+#include <linux/binfmts.h>
 
 #include <asm/spu.h>
 
@@ -126,7 +127,7 @@ int elf_coredump_extra_notes_size(void)
 	return ret;
 }
 
-int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
 {
 	struct spufs_calls *calls;
 	int ret;
@@ -135,7 +136,7 @@ int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
 	if (!calls)
 		return 0;
 
-	ret = calls->coredump_extra_notes_write(file, foffset);
+	ret = calls->coredump_extra_notes_write(cprm);
 
 	spufs_calls_put(calls);
 
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index c9500ea7be2f..5d9b0a288f36 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -27,6 +27,8 @@
 #include <linux/gfp.h>
 #include <linux/list.h>
 #include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
 
 #include <asm/uaccess.h>
 
@@ -52,35 +54,24 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
  * These are the only things you should do on a core-file: use only these
  * functions to write out all the necessary info.
  */
-static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset)
+static int spufs_dump_write(struct coredump_params *cprm, const void *addr, int nr)
 {
-	unsigned long limit = rlimit(RLIMIT_CORE);
-	ssize_t written;
-
-	if (*foffset + nr > limit)
+	if (!dump_emit(cprm, addr, nr))
 		return -EIO;
-
-	written = file->f_op->write(file, addr, nr, &file->f_pos);
-	*foffset += written;
-
-	if (written != nr)
-		return -EIO;
-
 	return 0;
 }
 
-static int spufs_dump_align(struct file *file, char *buf, loff_t new_off,
-			    loff_t *foffset)
+static int spufs_dump_align(struct coredump_params *cprm, char *buf, loff_t new_off)
 {
 	int rc, size;
 
-	size = min((loff_t)PAGE_SIZE, new_off - *foffset);
+	size = min((loff_t)PAGE_SIZE, new_off - cprm->written);
 	memset(buf, 0, size);
 
 	rc = 0;
-	while (rc == 0 && new_off > *foffset) {
-		size = min((loff_t)PAGE_SIZE, new_off - *foffset);
-		rc = spufs_dump_write(file, buf, size, foffset);
+	while (rc == 0 && new_off > cprm->written) {
+		size = min((loff_t)PAGE_SIZE, new_off - cprm->written);
+		rc = spufs_dump_write(cprm, buf, size);
 	}
 
 	return rc;
@@ -165,7 +156,7 @@ int spufs_coredump_extra_notes_size(void)
 }
 
 static int spufs_arch_write_note(struct spu_context *ctx, int i,
-				  struct file *file, int dfd, loff_t *foffset)
+				  struct coredump_params *cprm, int dfd)
 {
 	loff_t pos = 0;
 	int sz, rc, nread, total = 0;
@@ -186,22 +177,22 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 	en.n_descsz = sz;
 	en.n_type = NT_SPU;
 
-	rc = spufs_dump_write(file, &en, sizeof(en), foffset);
+	rc = spufs_dump_write(cprm, &en, sizeof(en));
 	if (rc)
 		goto out;
 
-	rc = spufs_dump_write(file, fullname, en.n_namesz, foffset);
+	rc = spufs_dump_write(cprm, fullname, en.n_namesz);
 	if (rc)
 		goto out;
 
-	rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset);
+	rc = spufs_dump_align(cprm, buf, roundup(cprm->written, 4));
 	if (rc)
 		goto out;
 
 	do {
 		nread = do_coredump_read(i, ctx, buf, bufsz, &pos);
 		if (nread > 0) {
-			rc = spufs_dump_write(file, buf, nread, foffset);
+			rc = spufs_dump_write(cprm, buf, nread);
 			if (rc)
 				goto out;
 			total += nread;
@@ -213,15 +204,14 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 		goto out;
 	}
 
-	rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4),
-			      foffset);
+	rc = spufs_dump_align(cprm, buf, roundup(cprm->written - total + sz, 4));
 
 out:
 	free_page((unsigned long)buf);
 	return rc;
 }
 
-int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
 {
 	struct spu_context *ctx;
 	int fd, j, rc;
@@ -233,7 +223,7 @@ int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
 			return rc;
 
 		for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
-			rc = spufs_arch_write_note(ctx, j, file, fd, foffset);
+			rc = spufs_arch_write_note(ctx, j, cprm, fd);
 			if (rc) {
 				spu_release_saved(ctx);
 				return rc;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 67852ade4c01..0ba3c9598358 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -247,12 +247,13 @@ extern const struct spufs_tree_descr spufs_dir_debug_contents[];
 
 /* system call implementation */
 extern struct spufs_calls spufs_calls;
+struct coredump_params;
 long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
 long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
 			umode_t mode, struct file *filp);
 /* ELF coredump callbacks for writing SPU ELF notes */
 extern int spufs_coredump_extra_notes_size(void);
-extern int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
 
 extern const struct file_operations spufs_context_fops;
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index ddab40d3d828..3bf75d767a4a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2037,7 +2037,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	size_t size = 0;
 	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
-	loff_t offset = 0, dataoff, foffset;
+	loff_t offset = 0, dataoff;
 	struct elf_note_info info = { };
 	struct elf_phdr *phdr4note = NULL;
 	struct elf_shdr *shdr4extnum = NULL;
@@ -2160,12 +2160,11 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!write_note_info(&info, cprm))
 		goto end_coredump;
 
-	foffset = cprm->written;
-	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
+	if (elf_coredump_extra_notes_write(cprm))
 		goto end_coredump;
 
 	/* Align to page */
-	if (!dump_seek(cprm->file, dataoff - foffset))
+	if (!dump_seek(cprm->file, dataoff - cprm->written))
 		goto end_coredump;
 
 	cprm->written = size;
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 40a3c0e01b2b..67a5fa7830c4 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -39,13 +39,13 @@ extern Elf64_Dyn _DYNAMIC [];
 
 /* Optional callbacks to write extra ELF notes. */
 struct file;
+struct coredump_params;
 
 #ifndef ARCH_HAVE_EXTRA_ELF_NOTES
 static inline int elf_coredump_extra_notes_size(void) { return 0; }
-static inline int elf_coredump_extra_notes_write(struct file *file,
-			loff_t *foffset) { return 0; }
+static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) { return 0; }
 #else
 extern int elf_coredump_extra_notes_size(void);
-extern int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset);
+extern int elf_coredump_extra_notes_write(struct coredump_params *cprm);
 #endif
 #endif /* _LINUX_ELF_H */
-- 
cgit v1.2.3


From 00d1a39e69d5afa7523dad515a05b21abd17c389 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Sep 2013 18:53:09 +0000
Subject: preempt: Make PREEMPT_ACTIVE generic

No point in having this bit defined by architecture.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20130917183629.090698799@linutronix.de
---
 arch/alpha/include/asm/thread_info.h      |  2 --
 arch/arc/include/asm/thread_info.h        |  2 --
 arch/arm/include/asm/thread_info.h        |  6 ------
 arch/arm64/include/asm/thread_info.h      |  6 ------
 arch/avr32/include/asm/thread_info.h      |  2 --
 arch/blackfin/include/asm/thread_info.h   |  2 --
 arch/c6x/include/asm/thread_info.h        |  2 --
 arch/cris/include/asm/thread_info.h       |  2 --
 arch/frv/include/asm/thread_info.h        |  2 --
 arch/hexagon/include/asm/thread_info.h    |  4 ----
 arch/ia64/include/asm/thread_info.h       |  3 ---
 arch/m32r/include/asm/thread_info.h       |  2 --
 arch/m68k/include/asm/thread_info.h       |  2 --
 arch/metag/include/asm/thread_info.h      |  2 --
 arch/microblaze/include/asm/thread_info.h |  2 --
 arch/mips/include/asm/thread_info.h       |  2 --
 arch/mn10300/include/asm/thread_info.h    |  2 --
 arch/parisc/include/asm/thread_info.h     |  3 ---
 arch/powerpc/include/asm/thread_info.h    |  2 --
 arch/s390/include/asm/thread_info.h       |  2 --
 arch/score/include/asm/thread_info.h      |  2 --
 arch/sh/include/asm/thread_info.h         |  2 --
 arch/sh/kernel/entry-common.S             |  6 ++----
 arch/sparc/include/asm/thread_info_32.h   |  2 --
 arch/sparc/include/asm/thread_info_64.h   |  2 --
 arch/tile/include/asm/thread_info.h       |  2 --
 arch/um/include/asm/thread_info.h         |  2 --
 arch/unicore32/include/asm/thread_info.h  |  6 ------
 arch/x86/include/asm/thread_info.h        |  2 --
 arch/xtensa/include/asm/thread_info.h     |  2 --
 include/linux/preempt_mask.h              | 15 +++++----------
 include/linux/sched.h                     |  2 +-
 32 files changed, 8 insertions(+), 89 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 52cd2a4a3ff4..453597b91f3a 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -58,8 +58,6 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define THREAD_SIZE_ORDER 1
 #define THREAD_SIZE (2*PAGE_SIZE)
 
-#define PREEMPT_ACTIVE		0x40000000
-
 /*
  * Thread information flags:
  * - these are process state flags and used from assembly
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 2d50a4cdd7f3..45be21672011 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -80,8 +80,6 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE      0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index df5e13d64f2c..71a06b293489 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -140,12 +140,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 				    struct user_vfp_exc __user *);
 #endif
 
-/*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
 /*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 23a3c4791d86..720e70b66ffd 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -88,12 +88,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-/*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
 /*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 6dc62e1f94c7..a978f3fe7c25 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -66,8 +66,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x40000000
-
 /*
  * Thread information flags
  * - these are process state flags that various assembly files may need to access
diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h
index 3894005337ba..55f473bdad36 100644
--- a/arch/blackfin/include/asm/thread_info.h
+++ b/arch/blackfin/include/asm/thread_info.h
@@ -88,8 +88,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TI_CPU		12
 #define TI_PREEMPT	16
 
-#define	PREEMPT_ACTIVE	0x4000000
-
 /*
  * thread information flag bit numbers
  */
diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h
index 4c8dc562bd90..d4e9ef87076d 100644
--- a/arch/c6x/include/asm/thread_info.h
+++ b/arch/c6x/include/asm/thread_info.h
@@ -84,8 +84,6 @@ struct thread_info *current_thread_info(void)
 #define put_thread_info(ti)	put_task_struct((ti)->task)
 #endif /* __ASSEMBLY__ */
 
-#define	PREEMPT_ACTIVE	0x10000000
-
 /*
  * thread information flag bit numbers
  * - pending work-to-be-done flags are in LSW
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 07c8c40c52b3..55dede18c032 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -44,8 +44,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index bebd7eadc772..af29e17c0181 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -52,8 +52,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h
index f7c32406a711..a59dad3b3695 100644
--- a/arch/hexagon/include/asm/thread_info.h
+++ b/arch/hexagon/include/asm/thread_info.h
@@ -73,10 +73,6 @@ struct thread_info {
 
 #endif  /* __ASSEMBLY__  */
 
-/*  looks like "linux/hardirq.h" uses this.  */
-
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifndef __ASSEMBLY__
 
 #define INIT_THREAD_INFO(tsk)                   \
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index cade13dd0299..5957cf61f898 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -11,9 +11,6 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 
-#define PREEMPT_ACTIVE_BIT 30
-#define PREEMPT_ACTIVE	(1 << PREEMPT_ACTIVE_BIT)
-
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index c074f4c2e858..00171703402f 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -53,8 +53,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #define THREAD_SIZE		(PAGE_SIZE << 1)
 #define THREAD_SIZE_ORDER	1
 /*
diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h
index 126131f94a2c..21a4784ca5a1 100644
--- a/arch/m68k/include/asm/thread_info.h
+++ b/arch/m68k/include/asm/thread_info.h
@@ -35,8 +35,6 @@ struct thread_info {
 };
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x4000000
-
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h
index 7c4a33006142..b19e9c588a16 100644
--- a/arch/metag/include/asm/thread_info.h
+++ b/arch/metag/include/asm/thread_info.h
@@ -46,8 +46,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_4KSTACKS
 #define THREAD_SHIFT		12
 #else
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index de26ea6373de..8c9d36591a03 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -106,8 +106,6 @@ static inline struct thread_info *current_thread_info(void)
 /* thread information allocation */
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index f9b24bfbdbae..4f58ef6d0eed 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -92,8 +92,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #define STACK_WARN	(THREAD_SIZE / 8)
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index 224b4262486d..bf280eaccd36 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -16,8 +16,6 @@
 
 #include <asm/page.h>
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_4KSTACKS
 #define THREAD_SIZE		(4096)
 #define THREAD_SIZE_ORDER	(0)
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index bc7cf120106b..d5f97ea3a4e1 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -46,9 +46,6 @@ struct thread_info {
 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
 #define THREAD_SHIFT            (PAGE_SHIFT + THREAD_SIZE_ORDER)
 
-#define PREEMPT_ACTIVE_BIT	28
-#define PREEMPT_ACTIVE		(1 << PREEMPT_ACTIVE_BIT)
-
 /*
  * thread information flags
  */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index ba7b1973866e..8fd6cf6dcee8 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -82,8 +82,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif /* __ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flag bit numbers
  */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index eb5f64d26d06..10e0fcd3633d 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -111,6 +111,4 @@ static inline struct thread_info *current_thread_info(void)
 #define is_32bit_task()		(1)
 #endif
 
-#define PREEMPT_ACTIVE		0x4000000
-
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h
index 1425cc034872..656b7ada9326 100644
--- a/arch/score/include/asm/thread_info.h
+++ b/arch/score/include/asm/thread_info.h
@@ -72,8 +72,6 @@ register struct thread_info *__current_thread_info __asm__("r28");
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 45a93669289d..ad27ffa65e2e 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -41,8 +41,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #if defined(CONFIG_4KSTACKS)
 #define THREAD_SHIFT	12
 #else
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index 9b6e4beeb296..ca46834294b7 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -108,7 +108,7 @@ need_resched:
 	and	#(0xf0>>1), r0		! interrupts off (exception path)?
 	cmp/eq	#(0xf0>>1), r0
 	bt	noresched
-	mov.l	3f, r0
+	mov.l	1f, r0
 	jsr	@r0			! call preempt_schedule_irq
 	 nop
 	bra	need_resched
@@ -119,9 +119,7 @@ noresched:
 	 nop
 
 	.align 2
-1:	.long	PREEMPT_ACTIVE
-2:	.long	schedule
-3:	.long	preempt_schedule_irq
+1:	.long	preempt_schedule_irq
 #endif
 
 ENTRY(resume_userspace)
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index dd3807599bb9..96efa7adc223 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -105,8 +105,6 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define TI_W_SAVED	0x250
 /* #define TI_RESTART_BLOCK 0x25n */ /* Nobody cares */
 
-#define PREEMPT_ACTIVE		0x4000000
-
 /*
  * thread information flag bit numbers
  */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index d5e504251079..2b4e17b79e9c 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -111,8 +111,6 @@ struct thread_info {
 #define THREAD_SHIFT PAGE_SHIFT
 #endif /* PAGE_SHIFT == 13 */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index b8aa6df3e102..729aa107f64e 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -113,8 +113,6 @@ extern void _cpu_idle(void);
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * Thread information flags that various assembly files may need to access.
  * Keep flags accessed frequently in low bits, particular since it makes
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index 2c8eeb2df8b4..1c5b2a83046a 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -60,8 +60,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
diff --git a/arch/unicore32/include/asm/thread_info.h b/arch/unicore32/include/asm/thread_info.h
index 818b4a1edb5b..af36d8eabdf1 100644
--- a/arch/unicore32/include/asm/thread_info.h
+++ b/arch/unicore32/include/asm/thread_info.h
@@ -117,12 +117,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-/*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring.  See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE	0x40000000
-
 /*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c46a46be1ec6..3ba3de457d05 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -153,8 +153,6 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #ifdef CONFIG_X86_32
 
 #define STACK_WARN	(THREAD_SIZE/8)
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 9481004ac119..470153e8547c 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -76,8 +76,6 @@ struct thread_info {
 
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 /*
  * macros/functions for gaining access to the thread information structure
  */
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
index 810d7e386f20..d169820203dd 100644
--- a/include/linux/preempt_mask.h
+++ b/include/linux/preempt_mask.h
@@ -17,10 +17,11 @@
  * there are a few palaeontologic drivers which reenable interrupts in
  * the handler, so we need more than one bit here.
  *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x000f0000
- *     NMI_MASK: 0x00100000
+ * PREEMPT_MASK:	0x000000ff
+ * SOFTIRQ_MASK:	0x0000ff00
+ * HARDIRQ_MASK:	0x000f0000
+ *     NMI_MASK:	0x00100000
+ * PREEMPT_ACTIVE:	0x00200000
  */
 #define PREEMPT_BITS	8
 #define SOFTIRQ_BITS	8
@@ -46,15 +47,9 @@
 
 #define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
 
-#ifndef PREEMPT_ACTIVE
 #define PREEMPT_ACTIVE_BITS	1
 #define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
 #define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-#endif
-
-#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
-#error PREEMPT_ACTIVE is too low!
-#endif
 
 #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
 #define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 045b0d227846..55080df48b70 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,7 +22,7 @@ struct sched_param {
 #include <linux/errno.h>
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
-#include <linux/preempt.h>
+#include <linux/preempt_mask.h>
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
-- 
cgit v1.2.3


From 4f804943f99454ac79e0f448428447f1a72d09fc Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 14 Nov 2013 14:31:38 -0800
Subject: powerpc: handle pgtable_page_ctor() fail

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/pgalloc-64.h | 5 ++++-
 arch/powerpc/mm/pgtable_32.c          | 5 ++++-
 arch/powerpc/mm/pgtable_64.c          | 7 ++++---
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index f65e27b09bd3..16cb92d215d2 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -91,7 +91,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
-	pgtable_page_ctor(page);
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
 	return page;
 }
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 6c856fb8c15b..5b9601715289 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -121,7 +121,10 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	ptepage = alloc_pages(flags, 0);
 	if (!ptepage)
 		return NULL;
-	pgtable_page_ctor(ptepage);
+	if (!pgtable_page_ctor(ptepage)) {
+		__free_page(ptepage);
+		return NULL;
+	}
 	return ptepage;
 }
 
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 536eec72c0f7..9d95786aa80f 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -378,6 +378,10 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
 				       __GFP_REPEAT | __GFP_ZERO);
 	if (!page)
 		return NULL;
+	if (!kernel && !pgtable_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
 
 	ret = page_address(page);
 	spin_lock(&mm->page_table_lock);
@@ -392,9 +396,6 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
 	}
 	spin_unlock(&mm->page_table_lock);
 
-	if (!kernel)
-		pgtable_page_ctor(page);
-
 	return (pte_t *)ret;
 }
 
-- 
cgit v1.2.3


From e844b1eeae42dc93bf13e67812a95ee7b58be8c7 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 20 Nov 2013 22:14:59 +1100
Subject: pseries: Add H_SET_MODE to change exception endianness

On little endian builds call H_SET_MODE so exceptions have the
correct endianness. We need to reset the endian during kexec
so do that in the MMU hashtable clear callback.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/hvcall.h         |  2 ++
 arch/powerpc/include/asm/plpar_wrappers.h | 26 +++++++++++++++++++
 arch/powerpc/platforms/pseries/lpar.c     | 17 +++++++++++++
 arch/powerpc/platforms/pseries/setup.c    | 42 +++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 0c7f2bfcf134..d8b600b3f058 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -403,6 +403,8 @@ static inline unsigned long cmo_get_page_size(void)
 extern long pSeries_enable_reloc_on_exc(void);
 extern long pSeries_disable_reloc_on_exc(void);
 
+extern long pseries_big_endian_exceptions(void);
+
 #else
 
 #define pSeries_enable_reloc_on_exc()  do {} while (0)
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index a63b045e707c..12c32c5f533d 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -287,6 +287,32 @@ static inline long disable_reloc_on_exceptions(void) {
 	return plpar_set_mode(0, 3, 0, 0);
 }
 
+/*
+ * Take exceptions in big endian mode on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_big_endian_exceptions(void)
+{
+	/* mflags = 0: big endian exceptions */
+	return plpar_set_mode(0, 4, 0, 0);
+}
+
+/*
+ * Take exceptions in little endian mode on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_little_endian_exceptions(void)
+{
+	/* mflags = 1: little endian exceptions */
+	return plpar_set_mode(1, 4, 0, 0);
+}
+
 static inline long plapr_set_ciabr(unsigned long ciabr)
 {
 	return plpar_set_mode(0, 1, ciabr, 0);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 356bc75ca74f..4fca3def9db9 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -245,6 +245,23 @@ static void pSeries_lpar_hptab_clear(void)
 					&(ptes[j].pteh), &(ptes[j].ptel));
 		}
 	}
+
+#ifdef __LITTLE_ENDIAN__
+	/* Reset exceptions to big endian */
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		long rc;
+
+		rc = pseries_big_endian_exceptions();
+		/*
+		 * At this point it is unlikely panic() will get anything
+		 * out to the user, but at least this will stop us from
+		 * continuing on further and creating an even more
+		 * difficult to debug situation.
+		 */
+		if (rc)
+			panic("Could not enable big endian exceptions");
+	}
+#endif
 }
 
 /*
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 1f97e2b87a62..c1f190858701 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -442,6 +442,32 @@ static void pSeries_machine_kexec(struct kimage *image)
 }
 #endif
 
+#ifdef __LITTLE_ENDIAN__
+long pseries_big_endian_exceptions(void)
+{
+	long rc;
+
+	while (1) {
+		rc = enable_big_endian_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			return rc;
+		mdelay(get_longbusy_msecs(rc));
+	}
+}
+
+static long pseries_little_endian_exceptions(void)
+{
+	long rc;
+
+	while (1) {
+		rc = enable_little_endian_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			return rc;
+		mdelay(get_longbusy_msecs(rc));
+	}
+}
+#endif
+
 static void __init pSeries_setup_arch(void)
 {
 	panic_timeout = 10;
@@ -698,6 +724,22 @@ static int __init pSeries_probe(void)
 	/* Now try to figure out if we are running on LPAR */
 	of_scan_flat_dt(pseries_probe_fw_features, NULL);
 
+#ifdef __LITTLE_ENDIAN__
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		long rc;
+		/*
+		 * Tell the hypervisor that we want our exceptions to
+		 * be taken in little endian mode. If this fails we don't
+		 * want to use BUG() because it will trigger an exception.
+		 */
+		rc = pseries_little_endian_exceptions();
+		if (rc) {
+			ppc_md.progress("H_SET_MODE LE exception fail", 0);
+			panic("Could not enable little endian exceptions");
+		}
+	}
+#endif
+
 	if (firmware_has_feature(FW_FEATURE_LPAR))
 		hpte_init_lpar();
 	else
-- 
cgit v1.2.3


From 373c76d62240cd3c1a3af42440b0a73cb7296bda Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 20 Nov 2013 22:15:00 +1100
Subject: powerpc: Add TIF_ELF2ABI flag.

Little endian ppc64 is getting an exciting new ABI.  This is reflected
by the bottom two bits of e_flags in the ELF header:

	0 == legacy binaries (v1 ABI)
	1 == binaries using the old ABI (compiled with a new toolchain)
	2 == binaries using the new ABI.

We store this in a thread flag, because we need to set it in core
dumps and for signal delivery.  Our chief concern is that it doesn't
use function descriptors.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/elf.h         | 2 ++
 arch/powerpc/include/asm/thread_info.h | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index cc0655a702a7..6d0e2369fe81 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -86,6 +86,8 @@ typedef elf_vrregset_t elf_fpxregset_t;
 #ifdef __powerpc64__
 # define SET_PERSONALITY(ex)					\
 do {								\
+	if (((ex).e_flags & 0x3) == 2)				\
+		set_thread_flag(TIF_ELF2ABI);			\
 	if ((ex).e_ident[EI_CLASS] == ELFCLASS32)		\
 		set_thread_flag(TIF_32BIT);			\
 	else							\
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index ba7b1973866e..05a3030b4582 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -107,6 +107,9 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_EMULATE_STACK_STORE	16	/* Is an instruction emulation
 						for stack store? */
 #define TIF_MEMDIE		17	/* is terminating due to OOM killer */
+#if defined(CONFIG_PPC64)
+#define TIF_ELF2ABI		18	/* function descriptors must die! */
+#endif
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -185,6 +188,12 @@ static inline bool test_thread_local_flags(unsigned int flags)
 #define is_32bit_task()	(1)
 #endif
 
+#if defined(CONFIG_PPC64)
+#define is_elf2_task() (test_thread_flag(TIF_ELF2ABI))
+#else
+#define is_elf2_task() (0)
+#endif
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 918d03552e031dcbdaa2a22da3c2abff5952c38a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 20 Nov 2013 22:15:01 +1100
Subject: powerpc: Set eflags correctly for ELF ABIv2 core dumps.

We leave it at zero (though it could be 1) for old tasks.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/elf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 6d0e2369fe81..935b5e7a1436 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -31,6 +31,8 @@
 extern unsigned long randomize_et_dyn(unsigned long base);
 #define ELF_ET_DYN_BASE		(randomize_et_dyn(0x20000000))
 
+#define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0)
+
 /*
  * Our registers are always unsigned longs, whether we're a 32 bit
  * process or 64 bit, on either a 64 bit or 32 bit kernel.
-- 
cgit v1.2.3


From 3eb906c6b6c123513718e7742a96a4189f900382 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 20 Nov 2013 11:05:01 +1100
Subject: powerpc: Make cpu_to_chip_id() available when SMP=n

Up until now we have only used cpu_to_chip_id() in the topology code,
which is only used on SMP builds. However my recent commit a4da0d5
"Implement arch_get_random_long/int() for powernv" added a usage when
SMP=n, breaking the build.

Move cpu_to_chip_id() into prom.c so it is available for SMP=n builds.

We would move the extern to prom.h, but that breaks the include in
topology.h. Instead we leave it in smp.h, but move it out of the
CONFIG_SMP #ifdef. We also need to include asm/smp.h in rng.c, because
the linux version skips asm/smp.h on UP. What a mess.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/smp.h       |  2 +-
 arch/powerpc/kernel/prom.c           | 20 ++++++++++++++++++++
 arch/powerpc/kernel/smp.c            | 16 ----------------
 arch/powerpc/platforms/powernv/rng.c |  1 +
 4 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'arch/powerpc/include')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 98da78e0c2c0..084e0807db98 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -33,6 +33,7 @@ extern int boot_cpuid;
 extern int spinning_secondaries;
 
 extern void cpu_die(void);
+extern int cpu_to_chip_id(int cpu);
 
 #ifdef CONFIG_SMP
 
@@ -112,7 +113,6 @@ static inline struct cpumask *cpu_core_mask(int cpu)
 }
 
 extern int cpu_to_core_id(int cpu);
-extern int cpu_to_chip_id(int cpu);
 
 /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
  *
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f3a47098fb8e..fa0ad8aafbcc 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -777,6 +777,26 @@ int of_get_ibm_chip_id(struct device_node *np)
 	return -1;
 }
 
+/**
+ * cpu_to_chip_id - Return the cpus chip-id
+ * @cpu: The logical cpu number.
+ *
+ * Return the value of the ibm,chip-id property corresponding to the given
+ * logical cpu number. If the chip-id can not be found, returns -1.
+ */
+int cpu_to_chip_id(int cpu)
+{
+	struct device_node *np;
+
+	np = of_get_cpu_node(cpu, NULL);
+	if (!np)
+		return -1;
+
+	of_node_put(np);
+	return of_get_ibm_chip_id(np);
+}
+EXPORT_SYMBOL(cpu_to_chip_id);
+
 #ifdef CONFIG_PPC_PSERIES
 /*
  * Fix up the uninitialized fields in a new device node:
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 930cd8af3503..a3b64f3bf9a2 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -597,22 +597,6 @@ out:
 	return id;
 }
 
-/* Return the value of the chip-id property corresponding
- * to the given logical cpu.
- */
-int cpu_to_chip_id(int cpu)
-{
-	struct device_node *np;
-
-	np = of_get_cpu_node(cpu, NULL);
-	if (!np)
-		return -1;
-
-	of_node_put(np);
-	return of_get_ibm_chip_id(np);
-}
-EXPORT_SYMBOL(cpu_to_chip_id);
-
 /* Helper routines for cpu to core mapping */
 int cpu_core_index_of_thread(int cpu)
 {
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 8844628915dc..1cb160dc1609 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -19,6 +19,7 @@
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
+#include <asm/smp.h>
 
 
 struct powernv_rng {
-- 
cgit v1.2.3