From 797179bc4fe06c89e47a9f36f886f68640b423f8 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 9 Jun 2016 10:50:43 +0100
Subject: MIPS: KVM: Fix modular KVM under QEMU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Copy __kvm_mips_vcpu_run() into unmapped memory, so that we can never
get a TLB refill exception in it when KVM is built as a module.

This was observed to happen with the host MIPS kernel running under
QEMU, due to a not entirely transparent optimisation in the QEMU TLB
handling where TLB entries replaced with TLBWR are copied to a separate
part of the TLB array. Code in those pages continue to be executable,
but those mappings persist only until the next ASID switch, even if they
are marked global.

An ASID switch happens in __kvm_mips_vcpu_run() at exception level after
switching to the guest exception base. Subsequent TLB mapped kernel
instructions just prior to switching to the guest trigger a TLB refill
exception, which enters the guest exception handlers without updating
EPC. This appears as a guest triggered TLB refill on a host kernel
mapped (host KSeg2) address, which is not handled correctly as user
(guest) mode accesses to kernel (host) segments always generate address
error exceptions.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: kvm@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 3.10.x-
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/mips/include/asm/kvm_host.h |  1 +
 arch/mips/kvm/interrupt.h        |  1 +
 arch/mips/kvm/locore.S           |  1 +
 arch/mips/kvm/mips.c             | 11 ++++++++++-
 4 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch/mips')

diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 6733ac575da4..2d5bb133d11a 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -338,6 +338,7 @@ struct kvm_mips_tlb {
 #define KVM_MIPS_GUEST_TLB_SIZE	64
 struct kvm_vcpu_arch {
 	void *host_ebase, *guest_ebase;
+	int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
 	unsigned long host_stack;
 	unsigned long host_gp;
 
diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h
index 4ab4bdfad703..2143884709e4 100644
--- a/arch/mips/kvm/interrupt.h
+++ b/arch/mips/kvm/interrupt.h
@@ -28,6 +28,7 @@
 #define MIPS_EXC_MAX                12
 /* XXXSL More to follow */
 
+extern char __kvm_mips_vcpu_run_end[];
 extern char mips32_exception[], mips32_exceptionEnd[];
 extern char mips32_GuestException[], mips32_GuestExceptionEnd[];
 
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
index 3ef03009de5f..828fcfc1cd7f 100644
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -202,6 +202,7 @@ FEXPORT(__kvm_mips_load_k0k1)
 
 	/* Jump to guest */
 	eret
+EXPORT(__kvm_mips_vcpu_run_end)
 
 VECTOR(MIPSX(exception), unknown)
 /* Find out what mode we came from and jump to the proper handler. */
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index dc052fb5c7a2..44da5259f390 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -315,6 +315,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	memcpy(gebase + offset, mips32_GuestException,
 	       mips32_GuestExceptionEnd - mips32_GuestException);
 
+#ifdef MODULE
+	offset += mips32_GuestExceptionEnd - mips32_GuestException;
+	memcpy(gebase + offset, (char *)__kvm_mips_vcpu_run,
+	       __kvm_mips_vcpu_run_end - (char *)__kvm_mips_vcpu_run);
+	vcpu->arch.vcpu_run = gebase + offset;
+#else
+	vcpu->arch.vcpu_run = __kvm_mips_vcpu_run;
+#endif
+
 	/* Invalidate the icache for these ranges */
 	local_flush_icache_range((unsigned long)gebase,
 				(unsigned long)gebase + ALIGN(size, PAGE_SIZE));
@@ -404,7 +413,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	/* Disable hardware page table walking while in guest */
 	htw_stop();
 
-	r = __kvm_mips_vcpu_run(run, vcpu);
+	r = vcpu->arch.vcpu_run(run, vcpu);
 
 	/* Re-enable HTW before enabling interrupts */
 	htw_start();
-- 
cgit v1.2.3


From 7f5a1ddc792901249c2060e165bcb3ca779cde35 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 9 Jun 2016 10:50:44 +0100
Subject: MIPS: KVM: Include bit 31 in segment matches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When faulting guest addresses are matched against guest segments with
the KVM_GUEST_KSEGX() macro, change the mask to 0xe0000000 so as to
include bit 31.

This is mainly for safety's sake, as it prevents a rogue BadVAddr in the
host kseg2/kseg3 segments (e.g. 0xC*******) after a TLB exception from
matching the guest kseg0 segment (e.g. 0x4*******), triggering an
internal KVM error instead of allowing the corresponding guest kseg0
page to be mapped into the host vmalloc space.

Such a rogue BadVAddr was observed to happen with the host MIPS kernel
running under QEMU with KVM built as a module, due to a not entirely
transparent optimisation in the QEMU TLB handling. This has already been
worked around properly in a previous commit.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: kvm@vger.kernel.org
Cc: linux-mips@linux-mips.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/mips/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/mips')

diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 2d5bb133d11a..36a391d289aa 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -74,7 +74,7 @@
 #define KVM_GUEST_KUSEG			0x00000000UL
 #define KVM_GUEST_KSEG0			0x40000000UL
 #define KVM_GUEST_KSEG23		0x60000000UL
-#define KVM_GUEST_KSEGX(a)		((_ACAST32_(a)) & 0x60000000)
+#define KVM_GUEST_KSEGX(a)		((_ACAST32_(a)) & 0xe0000000)
 #define KVM_GUEST_CPHYSADDR(a)		((_ACAST32_(a)) & 0x1fffffff)
 
 #define KVM_GUEST_CKSEG0ADDR(a)		(KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG0)
-- 
cgit v1.2.3


From cc81e9486202345d6ca56495cf8b5f3d03fbc563 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 9 Jun 2016 10:50:45 +0100
Subject: MIPS: KVM: Don't unwind PC when emulating CACHE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a CACHE instruction is emulated by kvm_mips_emulate_cache(), the PC
is first updated to point to the next instruction, and afterwards it
falls through the "dont_update_pc" label, which rewinds the PC back to
its original address.

This works when dynamic translation of emulated instructions is enabled,
since the CACHE instruction is replaced with a SYNCI which works without
trapping, however when dynamic translation is disabled the guest hangs
on CACHE instructions as they always trap and are never stepped over.

Roughly swap the meanings of the "done" and "dont_update_pc" to match
kvm_mips_emulate_CP0(), so that "done" will roll back the PC on failure,
and "dont_update_pc" won't change PC at all (for the sake of exceptions
that have already modified the PC).

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: kvm@vger.kernel.org
Cc: linux-mips@linux-mips.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/mips/kvm/emulate.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'arch/mips')

diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 396df6eb0a12..52bec0fe2fbb 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1666,7 +1666,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 			cache, op, base, arch->gprs[base], offset);
 		er = EMULATE_FAIL;
 		preempt_enable();
-		goto dont_update_pc;
+		goto done;
 
 	}
 
@@ -1694,16 +1694,20 @@ skip_fault:
 		kvm_err("NO-OP CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
 			cache, op, base, arch->gprs[base], offset);
 		er = EMULATE_FAIL;
-		preempt_enable();
-		goto dont_update_pc;
 	}
 
 	preempt_enable();
+done:
+	/* Rollback PC only if emulation was unsuccessful */
+	if (er == EMULATE_FAIL)
+		vcpu->arch.pc = curr_pc;
 
 dont_update_pc:
-	/* Rollback PC */
-	vcpu->arch.pc = curr_pc;
-done:
+	/*
+	 * This is for exceptions whose emulation updates the PC, so do not
+	 * overwrite the PC under any circumstances
+	 */
+
 	return er;
 }
 
-- 
cgit v1.2.3


From 6df82a7b88dc9b0b519765562b005ef9196d812a Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 9 Jun 2016 10:50:46 +0100
Subject: MIPS: KVM: Fix CACHE triggered exception emulation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When emulating TLB miss / invalid exceptions during CACHE instruction
emulation, be sure to set up the correct PC and host_cp0_badvaddr state
for the kvm_mips_emlulate_tlb*_ld() function to pick up for guest EPC
and BadVAddr.

PC needs to be rewound otherwise the guest EPC will end up pointing at
the next instruction after the faulting CACHE instruction.

host_cp0_badvaddr must be set because guest CACHE instructions trap with
a Coprocessor Unusable exception, which doesn't update the host BadVAddr
as a TLB exception would.

This doesn't tend to get hit when dynamic translation of emulated
instructions is enabled, since only the first execution of each CACHE
instruction actually goes through this code path, with subsequent
executions hitting the SYNCI instruction that it gets replaced with.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: kvm@vger.kernel.org
Cc: linux-mips@linux-mips.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/mips/kvm/emulate.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/mips')

diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 52bec0fe2fbb..645c8a1982a7 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1636,6 +1636,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 		if (index < 0) {
 			vcpu->arch.host_cp0_entryhi = (va & VPN2_MASK);
 			vcpu->arch.host_cp0_badvaddr = va;
+			vcpu->arch.pc = curr_pc;
 			er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run,
 							 vcpu);
 			preempt_enable();
@@ -1647,6 +1648,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 			 * invalid exception to the guest
 			 */
 			if (!TLB_IS_VALID(*tlb, va)) {
+				vcpu->arch.host_cp0_badvaddr = va;
+				vcpu->arch.pc = curr_pc;
 				er = kvm_mips_emulate_tlbinv_ld(cause, NULL,
 								run, vcpu);
 				preempt_enable();
-- 
cgit v1.2.3


From 65f84656ff7c24177c43652bc88cc2a06f9a48b1 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:01 -0700
Subject: mips: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

pte_alloc_one{_kernel}, pmd_alloc_one allocate PTE_ORDER resp.
PMD_ORDER but both are not larger than 1.  This means that this flag has
never been actually useful here because it has always been used only for
PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-8-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: John Crispin <blogic@openwrt.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/include/asm/pgalloc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/mips')

diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index b336037e8768..93c079a1cfc8 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -69,7 +69,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, PTE_ORDER);
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
 
 	return pte;
 }
@@ -79,7 +79,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
+	pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
 	if (!pte)
 		return NULL;
 	clear_highpage(pte);
@@ -113,7 +113,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pmd_t *pmd;
 
-	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PMD_ORDER);
+	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ORDER);
 	if (pmd)
 		pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
 	return pmd;
-- 
cgit v1.2.3


From 6d037de90a1fd7b4879b48d4dd5c4839b271be98 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 1 Jul 2016 15:01:01 +0200
Subject: MIPS: Fix possible corruption of cache mode by mprotect.

The following testcase may result in a page table entries with a invalid
CCA field being generated:

static void *bindstack;

static int sysrqfd;

static void protect_low(int protect)
{
	mprotect(bindstack, BINDSTACK_SIZE, protect);
}

static void sigbus_handler(int signal, siginfo_t * info, void *context)
{
	void *addr = info->si_addr;

	write(sysrqfd, "x", 1);

	printf("sigbus, fault address %p (should not happen, but might)\n",
	       addr);
	abort();
}

static void run_bind_test(void)
{
	unsigned int *p = bindstack;

	p[0] = 0xf001f001;

	write(sysrqfd, "x", 1);

	/* Set trap on access to p[0] */
	protect_low(PROT_NONE);

	write(sysrqfd, "x", 1);

	/* Clear trap on access to p[0] */
	protect_low(PROT_READ | PROT_WRITE | PROT_EXEC);

	write(sysrqfd, "x", 1);

	/* Check the contents of p[0] */
	if (p[0] != 0xf001f001) {
		write(sysrqfd, "x", 1);

		/* Reached, but shouldn't be */
		printf("badness, shouldn't happen but does\n");
		abort();
	}
}

int main(void)
{
	struct sigaction sa;

	sysrqfd = open("/proc/sysrq-trigger", O_WRONLY);

	if (sigprocmask(SIG_BLOCK, NULL, &sa.sa_mask)) {
		perror("sigprocmask");
		return 0;
	}

	sa.sa_sigaction = sigbus_handler;
	sa.sa_flags = SA_SIGINFO | SA_NODEFER | SA_RESTART;
	if (sigaction(SIGBUS, &sa, NULL)) {
		perror("sigaction");
		return 0;
	}

	bindstack = mmap(NULL,
			 BINDSTACK_SIZE,
			 PROT_READ | PROT_WRITE | PROT_EXEC,
			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
	if (bindstack == MAP_FAILED) {
		perror("mmap bindstack");
		return 0;
	}

	printf("bindstack: %p\n", bindstack);

	run_bind_test();

	printf("done\n");

	return 0;
}

There are multiple ingredients for this:

 1) PAGE_NONE is defined to _CACHE_CACHABLE_NONCOHERENT, which is CCA 3
    on all platforms except SB1 where it's CCA 5.
 2) _page_cachable_default must have bits set which are not set
    _CACHE_CACHABLE_NONCOHERENT.
 3) Either the defective version of pte_modify for XPA or the standard
    version must be in used.  However pte_modify for the 36 bit address
    space support is no affected.

In that case additional bits in the final CCA mode may generate an invalid
value for the CCA field.  On the R10000 system where this was tracked
down for example a CCA 7 has been observed, which is Uncached Accelerated.

Fixed by:

 1) Using the proper CCA mode for PAGE_NONE just like for all the other
    PAGE_* pte/pmd bits.
 2) Fix the two affected variants of pte_modify.

Further code inspection also shows the same issue to exist in pmd_modify
which would affect huge page systems.

Issue in pte_modify tracked down by Alastair Bridgewater, PAGE_NONE
and pmd_modify issue found by me.

The history of this goes back beyond Linus' git history.  Chris Dearman's
commit 351336929ccf222ae38ff0cb7a8dd5fd5c6236a0 ("[MIPS] Allow setting of
the cache attribute at run time.") missed the opportunity to fix this
but it was originally introduced in lmo commit
d523832cf12007b3242e50bb77d0c9e63e0b6518 ("Missing from last commit.")
and 32cc38229ac7538f2346918a09e75413e8861f87 ("New configuration option
CONFIG_MIPS_UNCACHED.")

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Reported-by: Alastair Bridgewater <alastair.bridgewater@gmail.com>
---
 arch/mips/include/asm/pgtable.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'arch/mips')

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index a6b611f1da43..f53816744d60 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -24,7 +24,7 @@ struct mm_struct;
 struct vm_area_struct;
 
 #define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \
-				 _CACHE_CACHABLE_NONCOHERENT)
+				 _page_cachable_default)
 #define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
 				 _page_cachable_default)
 #define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \
@@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	pte.pte_low  &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK);
 	pte.pte_high &= (_PFN_MASK | _CACHE_MASK);
 	pte.pte_low  |= pgprot_val(newprot) & ~_PFNX_MASK;
-	pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK;
+	pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK);
 	return pte;
 }
 #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
@@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
+		     (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
 }
 #endif
 
@@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd)
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) |
+		       (pgprot_val(newprot) & ~_PAGE_CHG_MASK);
 	return pmd;
 }
 
-- 
cgit v1.2.3