summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMichael Ellerman <mpe@ellerman.id.au>2022-03-28 12:11:24 +0200
committerMichael Ellerman <mpe@ellerman.id.au>2022-03-28 12:11:24 +0200
commit1aa940d97f8cee25d48faf05d18eddfbc2fe9554 (patch)
tree8465d3b8a83446155779cc13fb74123ecb5da87e /arch
parentpowerpc: restore removed #endif (diff)
parentKVM: PPC: Use KVM_CAP_PPC_AIL_MODE_3 (diff)
downloadlinux-1aa940d97f8cee25d48faf05d18eddfbc2fe9554.tar.xz
linux-1aa940d97f8cee25d48faf05d18eddfbc2fe9554.zip
Merge branch 'topic/ppc-kvm' into next
Merge some more commits from our KVM topic branch. In particular this brings in some commits that depend on a new capability that was merged via the KVM tree for v5.18.
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/setup.h2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S4
-rw-r--r--arch/powerpc/kernel/setup_64.c28
-rw-r--r--arch/powerpc/kvm/Kconfig9
-rw-r--r--arch/powerpc/kvm/book3s_hv.c41
-rw-r--r--arch/powerpc/kvm/book3s_pr.c26
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c20
-rw-r--r--arch/powerpc/kvm/powerpc.c17
-rw-r--r--arch/powerpc/platforms/pseries/setup.c13
9 files changed, 142 insertions, 18 deletions
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 049ca26893e6..8fa37ef5da4d 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -28,11 +28,13 @@ void setup_panic(void);
#define ARCH_PANIC_TIMEOUT 180
#ifdef CONFIG_PPC_PSERIES
+extern bool pseries_reloc_on_exception(void);
extern bool pseries_enable_reloc_on_exc(void);
extern void pseries_disable_reloc_on_exc(void);
extern void pseries_big_endian_exceptions(void);
void __init pseries_little_endian_exceptions(void);
#else
+static inline bool pseries_reloc_on_exception(void) { return false; }
static inline bool pseries_enable_reloc_on_exc(void) { return false; }
static inline void pseries_disable_reloc_on_exc(void) {}
static inline void pseries_big_endian_exceptions(void) {}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 55caeee37c08..b66dd6f775a4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -809,6 +809,10 @@ __start_interrupts:
* - MSR_EE|MSR_RI is clear (no reentrant exceptions)
* - Standard kernel environment is set up (stack, paca, etc)
*
+ * KVM:
+ * These interrupts do not elevate HV 0->1, so HV is not involved. PR KVM
+ * ensures that FSCR[SCV] is disabled whenever it has to force AIL off.
+ *
* Call convention:
*
* syscall register convention is in Documentation/powerpc/syscall64-abi.rst
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e547066a06aa..a96f05063bc9 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -196,6 +196,34 @@ static void __init configure_exceptions(void)
/* Under a PAPR hypervisor, we need hypercalls */
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+ /*
+ * - PR KVM does not support AIL mode interrupts in the host
+ * while a PR guest is running.
+ *
+ * - SCV system call interrupt vectors are only implemented for
+ * AIL mode interrupts.
+ *
+ * - On pseries, AIL mode can only be enabled and disabled
+ * system-wide so when a PR VM is created on a pseries host,
+ * all CPUs of the host are set to AIL=0 mode.
+ *
+ * - Therefore host CPUs must not execute scv while a PR VM
+ * exists.
+ *
+ * - SCV support can not be disabled dynamically because the
+ * feature is advertised to host userspace. Disabling the
+ * facility and emulating it would be possible but is not
+ * implemented.
+ *
+ * - So SCV support is blanket disabled if PR KVM could possibly
+ * run. That is, PR support compiled in, booting on pseries
+ * with hash MMU.
+ */
+ if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) {
+ init_task.thread.fscr &= ~FSCR_SCV;
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+ }
+
/* Enable AIL if possible */
if (!pseries_enable_reloc_on_exc()) {
init_task.thread.fscr &= ~FSCR_SCV;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 18e58085447c..ddd88179110a 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -112,12 +112,21 @@ config KVM_BOOK3S_64_PR
guest in user mode (problem state) and emulating all
privileged instructions and registers.
+ This is only available for hash MMU mode and only supports
+ guests that use hash MMU mode.
+
This is not as fast as using hypervisor mode, but works on
machines where hypervisor mode is not available or not usable,
and can emulate processors that are different from the host
processor, including emulating 32-bit processors on a 64-bit
host.
+ Selecting this option will cause the SCV facility to be
+ disabled when the kernel is booted on the pseries platform in
+ hash MMU mode (regardless of PR VMs running). When any PR VMs
+ are running, "AIL" mode is disabled which may slow interrupts
+ and system calls on the host.
+
config KVM_BOOK3S_HV_EXIT_TIMING
bool "Detailed timing for hypervisor real-mode code"
depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c886557638a1..6fa518f6501d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -225,6 +225,13 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
int cpu;
struct rcuwait *waitp;
+ /*
+ * rcuwait_wake_up contains smp_mb() which orders prior stores that
+ * create pending work vs below loads of cpu fields. The other side
+ * is the barrier in vcpu run that orders setting the cpu fields vs
+ * testing for pending work.
+ */
+
waitp = kvm_arch_vcpu_get_wait(vcpu);
if (rcuwait_wake_up(waitp))
++vcpu->stat.generic.halt_wakeup;
@@ -1089,7 +1096,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
}
tvcpu->arch.prodded = 1;
- smp_mb();
+ smp_mb(); /* This orders prodded store vs ceded load */
if (tvcpu->arch.ceded)
kvmppc_fast_vcpu_kick_hv(tvcpu);
break;
@@ -3766,6 +3773,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
pvc = core_info.vc[sub];
pvc->pcpu = pcpu + thr;
for_each_runnable_thread(i, vcpu, pvc) {
+ /*
+ * XXX: is kvmppc_start_thread called too late here?
+ * It updates vcpu->cpu and vcpu->arch.thread_cpu
+ * which are used by kvmppc_fast_vcpu_kick_hv(), but
+ * kick is called after new exceptions become available
+ * and exceptions are checked earlier than here, by
+ * kvmppc_core_prepare_to_enter.
+ */
kvmppc_start_thread(vcpu, pvc);
kvmppc_create_dtl_entry(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
@@ -4487,6 +4502,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
if (need_resched() || !kvm->arch.mmu_ready)
goto out;
+ vcpu->cpu = pcpu;
+ vcpu->arch.thread_cpu = pcpu;
+ vc->pcpu = pcpu;
+ local_paca->kvm_hstate.kvm_vcpu = vcpu;
+ local_paca->kvm_hstate.ptid = 0;
+ local_paca->kvm_hstate.fake_suspend = 0;
+
+ /*
+ * Orders set cpu/thread_cpu vs testing for pending interrupts and
+ * doorbells below. The other side is when these fields are set vs
+ * kvmppc_fast_vcpu_kick_hv reading the cpu/thread_cpu fields to
+ * kick a vCPU to notice the pending interrupt.
+ */
+ smp_mb();
+
if (!nested) {
kvmppc_core_prepare_to_enter(vcpu);
if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
@@ -4506,13 +4536,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
tb = mftb();
- vcpu->cpu = pcpu;
- vcpu->arch.thread_cpu = pcpu;
- vc->pcpu = pcpu;
- local_paca->kvm_hstate.kvm_vcpu = vcpu;
- local_paca->kvm_hstate.ptid = 0;
- local_paca->kvm_hstate.fake_suspend = 0;
-
__kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
trace_kvm_guest_enter(vcpu);
@@ -4614,6 +4637,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
out:
+ vcpu->cpu = -1;
+ vcpu->arch.thread_cpu = -1;
powerpc_local_irq_pmu_restore(flags);
preempt_enable();
goto done;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 34a801c3604a..7bf9e6ca5c2d 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -137,12 +137,15 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
svcpu->in_use = 0;
svcpu_put(svcpu);
-#endif
/* Disable AIL if supported */
- if (cpu_has_feature(CPU_FTR_HVMODE) &&
- cpu_has_feature(CPU_FTR_ARCH_207S))
- mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+ mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV);
+ }
+#endif
vcpu->cpu = smp_processor_id();
#ifdef CONFIG_PPC_BOOK3S_32
@@ -165,6 +168,14 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
svcpu_put(svcpu);
+
+ /* Enable AIL if supported */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+ mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV);
+ }
#endif
if (kvmppc_is_split_real(vcpu))
@@ -174,11 +185,6 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
kvmppc_save_tm_pr(vcpu);
- /* Enable AIL if supported */
- if (cpu_has_feature(CPU_FTR_HVMODE) &&
- cpu_has_feature(CPU_FTR_ARCH_207S))
- mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
-
vcpu->cpu = -1;
}
@@ -1037,6 +1043,8 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
{
+ if (fscr & FSCR_SCV)
+ fscr &= ~FSCR_SCV; /* SCV must not be enabled */
if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
/* TAR got dropped, drop it in shadow too */
kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 1f10e7dfcdd0..dc4f51ac84bc 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -281,6 +281,22 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_h_pr_set_mode(struct kvm_vcpu *vcpu)
+{
+ unsigned long mflags = kvmppc_get_gpr(vcpu, 4);
+ unsigned long resource = kvmppc_get_gpr(vcpu, 5);
+
+ if (resource == H_SET_MODE_RESOURCE_ADDR_TRANS_MODE) {
+ /* KVM PR does not provide AIL!=0 to guests */
+ if (mflags == 0)
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ else
+ kvmppc_set_gpr(vcpu, 3, H_UNSUPPORTED_FLAG_START - 63);
+ return EMULATE_DONE;
+ }
+ return EMULATE_FAIL;
+}
+
#ifdef CONFIG_SPAPR_TCE_IOMMU
static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
{
@@ -384,6 +400,8 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
return kvmppc_h_pr_logical_ci_load(vcpu);
case H_LOGICAL_CI_STORE:
return kvmppc_h_pr_logical_ci_store(vcpu);
+ case H_SET_MODE:
+ return kvmppc_h_pr_set_mode(vcpu);
case H_XIRR:
case H_CPPR:
case H_EOI:
@@ -421,6 +439,7 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
case H_CEDE:
case H_LOGICAL_CI_LOAD:
case H_LOGICAL_CI_STORE:
+ case H_SET_MODE:
#ifdef CONFIG_KVM_XICS
case H_XIRR:
case H_CPPR:
@@ -447,6 +466,7 @@ static unsigned int default_hcall_list[] = {
H_BULK_REMOVE,
H_PUT_TCE,
H_CEDE,
+ H_SET_MODE,
#ifdef CONFIG_KVM_XICS
H_XIRR,
H_CPPR,
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9772b176e406..875c30c12db0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -705,6 +705,23 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
#endif
+ case KVM_CAP_PPC_AIL_MODE_3:
+ r = 0;
+ /*
+ * KVM PR, POWER7, and some POWER9s don't support AIL=3 mode.
+ * The POWER9s can support it if the guest runs in hash mode,
+ * but QEMU doesn't necessarily query the capability in time.
+ */
+ if (hv_enabled) {
+ if (kvmhv_on_pseries()) {
+ if (pseries_reloc_on_exception())
+ r = 1;
+ } else if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+ r = 1;
+ }
+ }
+ break;
default:
r = 0;
break;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 069d7b3bb142..955ff8aa1644 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -353,6 +353,14 @@ static void pseries_lpar_idle(void)
pseries_idle_epilog();
}
+static bool pseries_reloc_on_exception_enabled;
+
+bool pseries_reloc_on_exception(void)
+{
+ return pseries_reloc_on_exception_enabled;
+}
+EXPORT_SYMBOL_GPL(pseries_reloc_on_exception);
+
/*
* Enable relocation on during exceptions. This has partition wide scope and
* may take a while to complete, if it takes longer than one second we will
@@ -377,6 +385,7 @@ bool pseries_enable_reloc_on_exc(void)
" on exceptions: %ld\n", rc);
return false;
}
+ pseries_reloc_on_exception_enabled = true;
return true;
}
@@ -404,7 +413,9 @@ void pseries_disable_reloc_on_exc(void)
break;
mdelay(get_longbusy_msecs(rc));
}
- if (rc != H_SUCCESS)
+ if (rc == H_SUCCESS)
+ pseries_reloc_on_exception_enabled = false;
+ else
pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
rc);
}